From 71fda52067f55ae45cdf4e6efab77a9a022a10ea Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Fri, 13 Mar 2026 14:48:21 +0800 Subject: [PATCH 1/9] feat: add gcta modules --- modules/nf-core/gcta/addgrms/environment.yml | 7 + modules/nf-core/gcta/addgrms/main.nf | 37 +++++ modules/nf-core/gcta/addgrms/meta.yml | 81 +++++++++++ .../nf-core/gcta/addgrms/tests/main.nf.test | 79 +++++++++++ .../gcta/addgrms/tests/main.nf.test.snap | 75 ++++++++++ .../gcta/addgrms/tests/nextflow.config | 3 + .../nf-core/gcta/adjustgrm/environment.yml | 7 + modules/nf-core/gcta/adjustgrm/main.nf | 38 ++++++ modules/nf-core/gcta/adjustgrm/meta.yml | 85 ++++++++++++ .../nf-core/gcta/adjustgrm/tests/main.nf.test | 67 +++++++++ .../gcta/adjustgrm/tests/main.nf.test.snap | 75 ++++++++++ .../gcta/adjustgrm/tests/nextflow.config | 3 + .../gcta/bivariatereml/environment.yml | 7 + modules/nf-core/gcta/bivariatereml/main.nf | 47 +++++++ modules/nf-core/gcta/bivariatereml/meta.yml | 118 ++++++++++++++++ .../gcta/bivariatereml/tests/main.nf.test | 86 ++++++++++++ .../bivariatereml/tests/main.nf.test.snap | 85 ++++++++++++ .../gcta/bivariatereml/tests/nextflow.config | 3 + .../gcta/bivariateremlldms/environment.yml | 7 + .../nf-core/gcta/bivariateremlldms/main.nf | 49 +++++++ .../nf-core/gcta/bivariateremlldms/meta.yml | 115 ++++++++++++++++ .../gcta/bivariateremlldms/tests/main.nf.test | 92 +++++++++++++ .../bivariateremlldms/tests/main.nf.test.snap | 85 ++++++++++++ .../bivariateremlldms/tests/nextflow.config | 3 + .../gcta/calculateldscores/environment.yml | 7 + .../nf-core/gcta/calculateldscores/main.nf | 69 ++++++++++ .../nf-core/gcta/calculateldscores/meta.yml | 105 ++++++++++++++ .../gcta/calculateldscores/tests/main.nf.test | 74 ++++++++++ .../calculateldscores/tests/main.nf.test.snap | 102 ++++++++++++++ .../calculateldscores/tests/nextflow.config | 3 + modules/nf-core/gcta/fastgwa/environment.yml | 7 + modules/nf-core/gcta/fastgwa/main.nf | 54 ++++++++ modules/nf-core/gcta/fastgwa/meta.yml | 128 ++++++++++++++++++ .../nf-core/gcta/fastgwa/tests/main.nf.test | 96 +++++++++++++ .../gcta/fastgwa/tests/main.nf.test.snap | 78 +++++++++++ .../gcta/fastgwa/tests/nextflow.config | 9 ++ .../gcta/filtergrmwithkeep/environment.yml | 7 + .../nf-core/gcta/filtergrmwithkeep/main.nf | 39 ++++++ .../nf-core/gcta/filtergrmwithkeep/meta.yml | 96 +++++++++++++ .../gcta/filtergrmwithkeep/tests/main.nf.test | 86 ++++++++++++ .../filtergrmwithkeep/tests/main.nf.test.snap | 75 ++++++++++ .../filtergrmwithkeep/tests/nextflow.config | 3 + .../nf-core/gcta/makebksparse/environment.yml | 7 + modules/nf-core/gcta/makebksparse/main.nf | 37 +++++ modules/nf-core/gcta/makebksparse/meta.yml | 84 ++++++++++++ .../gcta/makebksparse/tests/main.nf.test | 69 ++++++++++ .../gcta/makebksparse/tests/main.nf.test.snap | 72 ++++++++++ .../gcta/makebksparse/tests/nextflow.config | 3 + .../nf-core/gcta/makegrmpart/environment.yml | 7 + modules/nf-core/gcta/makegrmpart/main.nf | 47 +++++++ modules/nf-core/gcta/makegrmpart/meta.yml | 100 ++++++++++++++ .../gcta/makegrmpart/tests/main.nf.test | 87 ++++++++++++ .../gcta/makegrmpart/tests/main.nf.test.snap | 81 +++++++++++ .../gcta/makegrmpart/tests/nextflow.config | 3 + modules/nf-core/gcta/reml/environment.yml | 7 + modules/nf-core/gcta/reml/main.nf | 45 ++++++ modules/nf-core/gcta/reml/meta.yml | 105 ++++++++++++++ modules/nf-core/gcta/reml/tests/main.nf.test | 84 ++++++++++++ .../nf-core/gcta/reml/tests/main.nf.test.snap | 69 ++++++++++ .../nf-core/gcta/reml/tests/nextflow.config | 3 + modules/nf-core/gcta/remlldms/environment.yml | 7 + modules/nf-core/gcta/remlldms/main.nf | 44 ++++++ modules/nf-core/gcta/remlldms/meta.yml | 102 ++++++++++++++ .../nf-core/gcta/remlldms/tests/main.nf.test | 96 +++++++++++++ .../gcta/remlldms/tests/main.nf.test.snap | 69 ++++++++++ .../gcta/remlldms/tests/nextflow.config | 3 + .../removerelatedsubjects/environment.yml | 7 + .../gcta/removerelatedsubjects/main.nf | 39 ++++++ .../gcta/removerelatedsubjects/meta.yml | 96 +++++++++++++ .../removerelatedsubjects/tests/main.nf.test | 69 ++++++++++ .../tests/main.nf.test.snap | 99 ++++++++++++++ .../tests/nextflow.config | 3 + 72 files changed, 3836 insertions(+) create mode 100644 modules/nf-core/gcta/addgrms/environment.yml create mode 100644 modules/nf-core/gcta/addgrms/main.nf create mode 100644 modules/nf-core/gcta/addgrms/meta.yml create mode 100644 modules/nf-core/gcta/addgrms/tests/main.nf.test create mode 100644 modules/nf-core/gcta/addgrms/tests/main.nf.test.snap create mode 100644 modules/nf-core/gcta/addgrms/tests/nextflow.config create mode 100644 modules/nf-core/gcta/adjustgrm/environment.yml create mode 100644 modules/nf-core/gcta/adjustgrm/main.nf create mode 100644 modules/nf-core/gcta/adjustgrm/meta.yml create mode 100644 modules/nf-core/gcta/adjustgrm/tests/main.nf.test create mode 100644 modules/nf-core/gcta/adjustgrm/tests/main.nf.test.snap create mode 100644 modules/nf-core/gcta/adjustgrm/tests/nextflow.config create mode 100644 modules/nf-core/gcta/bivariatereml/environment.yml create mode 100644 modules/nf-core/gcta/bivariatereml/main.nf create mode 100644 modules/nf-core/gcta/bivariatereml/meta.yml create mode 100644 modules/nf-core/gcta/bivariatereml/tests/main.nf.test create mode 100644 modules/nf-core/gcta/bivariatereml/tests/main.nf.test.snap create mode 100644 modules/nf-core/gcta/bivariatereml/tests/nextflow.config create mode 100644 modules/nf-core/gcta/bivariateremlldms/environment.yml create mode 100644 modules/nf-core/gcta/bivariateremlldms/main.nf create mode 100644 modules/nf-core/gcta/bivariateremlldms/meta.yml create mode 100644 modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test create mode 100644 modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test.snap create mode 100644 modules/nf-core/gcta/bivariateremlldms/tests/nextflow.config create mode 100644 modules/nf-core/gcta/calculateldscores/environment.yml create mode 100644 modules/nf-core/gcta/calculateldscores/main.nf create mode 100644 modules/nf-core/gcta/calculateldscores/meta.yml create mode 100644 modules/nf-core/gcta/calculateldscores/tests/main.nf.test create mode 100644 modules/nf-core/gcta/calculateldscores/tests/main.nf.test.snap create mode 100644 modules/nf-core/gcta/calculateldscores/tests/nextflow.config create mode 100644 modules/nf-core/gcta/fastgwa/environment.yml create mode 100644 modules/nf-core/gcta/fastgwa/main.nf create mode 100644 modules/nf-core/gcta/fastgwa/meta.yml create mode 100644 modules/nf-core/gcta/fastgwa/tests/main.nf.test create mode 100644 modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap create mode 100644 modules/nf-core/gcta/fastgwa/tests/nextflow.config create mode 100644 modules/nf-core/gcta/filtergrmwithkeep/environment.yml create mode 100644 modules/nf-core/gcta/filtergrmwithkeep/main.nf create mode 100644 modules/nf-core/gcta/filtergrmwithkeep/meta.yml create mode 100644 modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test create mode 100644 modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test.snap create mode 100644 modules/nf-core/gcta/filtergrmwithkeep/tests/nextflow.config create mode 100644 modules/nf-core/gcta/makebksparse/environment.yml create mode 100644 modules/nf-core/gcta/makebksparse/main.nf create mode 100644 modules/nf-core/gcta/makebksparse/meta.yml create mode 100644 modules/nf-core/gcta/makebksparse/tests/main.nf.test create mode 100644 modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap create mode 100644 modules/nf-core/gcta/makebksparse/tests/nextflow.config create mode 100644 modules/nf-core/gcta/makegrmpart/environment.yml create mode 100644 modules/nf-core/gcta/makegrmpart/main.nf create mode 100644 modules/nf-core/gcta/makegrmpart/meta.yml create mode 100644 modules/nf-core/gcta/makegrmpart/tests/main.nf.test create mode 100644 modules/nf-core/gcta/makegrmpart/tests/main.nf.test.snap create mode 100644 modules/nf-core/gcta/makegrmpart/tests/nextflow.config create mode 100644 modules/nf-core/gcta/reml/environment.yml create mode 100644 modules/nf-core/gcta/reml/main.nf create mode 100644 modules/nf-core/gcta/reml/meta.yml create mode 100644 modules/nf-core/gcta/reml/tests/main.nf.test create mode 100644 modules/nf-core/gcta/reml/tests/main.nf.test.snap create mode 100644 modules/nf-core/gcta/reml/tests/nextflow.config create mode 100644 modules/nf-core/gcta/remlldms/environment.yml create mode 100644 modules/nf-core/gcta/remlldms/main.nf create mode 100644 modules/nf-core/gcta/remlldms/meta.yml create mode 100644 modules/nf-core/gcta/remlldms/tests/main.nf.test create mode 100644 modules/nf-core/gcta/remlldms/tests/main.nf.test.snap create mode 100644 modules/nf-core/gcta/remlldms/tests/nextflow.config create mode 100644 modules/nf-core/gcta/removerelatedsubjects/environment.yml create mode 100644 modules/nf-core/gcta/removerelatedsubjects/main.nf create mode 100644 modules/nf-core/gcta/removerelatedsubjects/meta.yml create mode 100644 modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test create mode 100644 modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test.snap create mode 100644 modules/nf-core/gcta/removerelatedsubjects/tests/nextflow.config diff --git a/modules/nf-core/gcta/addgrms/environment.yml b/modules/nf-core/gcta/addgrms/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/addgrms/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/addgrms/main.nf b/modules/nf-core/gcta/addgrms/main.nf new file mode 100644 index 000000000000..95e13c207611 --- /dev/null +++ b/modules/nf-core/gcta/addgrms/main.nf @@ -0,0 +1,37 @@ +process GCTA_ADDGRMS { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : + 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + + input: + tuple val(meta), path(mgrm_file), path(grm_files) + + output: + tuple val(meta), path("${meta.id}.grm.id"), path("${meta.id}.grm.bin"), path("${meta.id}.grm.N.bin"), emit: combined_grm + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + gcta \\ + --mgrm ${mgrm_file} \\ + --make-grm \\ + --out ${meta.id} \\ + --thread-num ${task.cpus} \\ + ${args} + """ + + stub: + """ + touch ${meta.id}.grm.id + touch ${meta.id}.grm.bin + touch ${meta.id}.grm.N.bin + """ +} diff --git a/modules/nf-core/gcta/addgrms/meta.yml b/modules/nf-core/gcta/addgrms/meta.yml new file mode 100644 index 000000000000..5e91cfd6cb9f --- /dev/null +++ b/modules/nf-core/gcta/addgrms/meta.yml @@ -0,0 +1,81 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_addgrms" +description: Combine multiple GRMs listed in an MGRM manifest into a single dense GRM +keywords: + - gcta + - grm + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + +input: + - - meta: + type: map + description: | + Groovy map containing combined GRM metadata + e.g. `[ id:'plink_simulated' ]` + - mgrm_file: + type: file + description: MGRM manifest listing the GRM prefixes to combine + pattern: "*.mgrm" + ontologies: + - edam: "http://edamontology.org/format_2330" + - grm_files: + type: file + description: GRM sidecar files referenced by `mgrm_file` + pattern: "*" + ontologies: [] + +output: + combined_grm: + - - meta: + type: map + description: | + Groovy map containing combined GRM metadata + e.g. `[ id:'plink_simulated' ]` + - "${meta.id}.grm.id": + type: file + description: Combined GRM sample identifier file + pattern: "${meta.id}.grm.id" + ontologies: [] + - "${meta.id}.grm.bin": + type: file + description: Combined GRM binary matrix file + pattern: "${meta.id}.grm.bin" + ontologies: [] + - "${meta.id}.grm.N.bin": + type: file + description: Combined GRM sample-count matrix file + pattern: "${meta.id}.grm.N.bin" + ontologies: [] + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | head -n 1": + type: eval + description: The command used to retrieve the GCTA version + +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | head -n 1: + type: eval + description: The command used to retrieve the GCTA version + +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/addgrms/tests/main.nf.test b/modules/nf-core/gcta/addgrms/tests/main.nf.test new file mode 100644 index 000000000000..b74521b5fea6 --- /dev/null +++ b/modules/nf-core/gcta/addgrms/tests/main.nf.test @@ -0,0 +1,79 @@ +nextflow_process { + + name "Test Process GCTA_ADDGRMS" + script "../main.nf" + process "GCTA_ADDGRMS" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/addgrms" + + test("homo_sapiens popgen - merge dense GRMs from mgrm") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:"plink_simulated_ldms" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms.mgrm", checkIfExists: true), + [ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.N.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.N.bin", checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.combined_grm.size() == 1 }, + { assert process.out.combined_grm.get(0).get(0).id == "plink_simulated_ldms" }, + { + assert snapshot( + process.out.combined_grm, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - merge dense GRMs from mgrm - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:"plink_simulated_ldms" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms.mgrm", checkIfExists: true), + [ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.N.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.N.bin", checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/addgrms/tests/main.nf.test.snap b/modules/nf-core/gcta/addgrms/tests/main.nf.test.snap new file mode 100644 index 000000000000..ef4571ed193a --- /dev/null +++ b/modules/nf-core/gcta/addgrms/tests/main.nf.test.snap @@ -0,0 +1,75 @@ +{ + "homo_sapiens popgen - merge dense GRMs from mgrm - stub": { + "content": [ + { + "0": [ + [ + { + "id": "plink_simulated_ldms" + }, + "plink_simulated_ldms.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_ldms.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_ldms.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GCTA_ADDGRMS", + "gcta", + "*******************************************************************" + ] + ], + "combined_grm": [ + [ + { + "id": "plink_simulated_ldms" + }, + "plink_simulated_ldms.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_ldms.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_ldms.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_ADDGRMS", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:16:18.17102219" + }, + "homo_sapiens popgen - merge dense GRMs from mgrm": { + "content": [ + [ + [ + { + "id": "plink_simulated_ldms" + }, + "plink_simulated_ldms.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "plink_simulated_ldms.grm.bin:md5,59a9d628e3fb4b9488244048c952b2ca", + "plink_simulated_ldms.grm.N.bin:md5,acaa43bbbf2253d392537a178ecf09a4" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_ADDGRMS", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:19:21.015064654" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/addgrms/tests/nextflow.config b/modules/nf-core/gcta/addgrms/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/addgrms/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} diff --git a/modules/nf-core/gcta/adjustgrm/environment.yml b/modules/nf-core/gcta/adjustgrm/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/adjustgrm/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/adjustgrm/main.nf b/modules/nf-core/gcta/adjustgrm/main.nf new file mode 100644 index 000000000000..28d1493f242e --- /dev/null +++ b/modules/nf-core/gcta/adjustgrm/main.nf @@ -0,0 +1,38 @@ +process GCTA_ADJUSTGRM { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : + 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + + input: + tuple val(meta), path(grm_id), path(grm_bin), path(grm_n_bin) + + output: + tuple val(meta), path("${meta.id}_adj.grm.id"), path("${meta.id}_adj.grm.bin"), path("${meta.id}_adj.grm.N.bin"), emit: grm_files + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + gcta \\ + --grm ${meta.id} \\ + --grm-adj 0 \\ + --make-grm \\ + --out ${meta.id}_adj \\ + --thread-num ${task.cpus} \\ + ${args} + """ + + stub: + """ + touch ${meta.id}_adj.grm.id + touch ${meta.id}_adj.grm.bin + touch ${meta.id}_adj.grm.N.bin + """ +} diff --git a/modules/nf-core/gcta/adjustgrm/meta.yml b/modules/nf-core/gcta/adjustgrm/meta.yml new file mode 100644 index 000000000000..b191a8a03f32 --- /dev/null +++ b/modules/nf-core/gcta/adjustgrm/meta.yml @@ -0,0 +1,85 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_adjustgrm" +description: Adjust a dense GRM for incomplete tagging using `gcta --grm-adj` +keywords: + - gcta + - grm + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + +input: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - grm_id: + type: file + description: Dense GRM sample identifier file + pattern: "*.grm.id" + ontologies: [] + - grm_bin: + type: file + description: Dense GRM binary matrix file + pattern: "*.grm.bin" + ontologies: [] + - grm_n_bin: + type: file + description: Dense GRM sample-count matrix file + pattern: "*.grm.N.bin" + ontologies: [] + +output: + grm_files: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - "${meta.id}_adj.grm.id": + type: file + description: Adjusted GRM sample identifier file + pattern: "${meta.id}_adj.grm.id" + ontologies: [] + - "${meta.id}_adj.grm.bin": + type: file + description: Adjusted GRM binary matrix file + pattern: "${meta.id}_adj.grm.bin" + ontologies: [] + - "${meta.id}_adj.grm.N.bin": + type: file + description: Adjusted GRM sample-count matrix file + pattern: "${meta.id}_adj.grm.N.bin" + ontologies: [] + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | head -n 1": + type: eval + description: The command used to retrieve the GCTA version + +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | head -n 1: + type: eval + description: The command used to retrieve the GCTA version + +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/adjustgrm/tests/main.nf.test b/modules/nf-core/gcta/adjustgrm/tests/main.nf.test new file mode 100644 index 000000000000..64b805325d05 --- /dev/null +++ b/modules/nf-core/gcta/adjustgrm/tests/main.nf.test @@ -0,0 +1,67 @@ +nextflow_process { + + name "Test Process GCTA_ADJUSTGRM" + script "../main.nf" + process "GCTA_ADJUSTGRM" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/adjustgrm" + + test("homo_sapiens popgen - adjust dense GRM") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.N.bin", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.grm_files.size() == 1 }, + { assert process.out.grm_files.get(0).get(0).id == "plink_simulated" }, + { + assert snapshot( + process.out.grm_files, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - adjust dense GRM - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.N.bin", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/adjustgrm/tests/main.nf.test.snap b/modules/nf-core/gcta/adjustgrm/tests/main.nf.test.snap new file mode 100644 index 000000000000..a2d574cecdc2 --- /dev/null +++ b/modules/nf-core/gcta/adjustgrm/tests/main.nf.test.snap @@ -0,0 +1,75 @@ +{ + "homo_sapiens popgen - adjust dense GRM - stub": { + "content": [ + { + "0": [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_adj.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_adj.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_adj.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GCTA_ADJUSTGRM", + "gcta", + "*******************************************************************" + ] + ], + "grm_files": [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_adj.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_adj.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_adj.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_ADJUSTGRM", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T13:52:09.111456691" + }, + "homo_sapiens popgen - adjust dense GRM": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_adj.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "plink_simulated_adj.grm.bin:md5,6d16a365bd94b621963769e8314eeaa0", + "plink_simulated_adj.grm.N.bin:md5,acaa43bbbf2253d392537a178ecf09a4" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_ADJUSTGRM", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:19:40.424594248" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/adjustgrm/tests/nextflow.config b/modules/nf-core/gcta/adjustgrm/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/adjustgrm/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} diff --git a/modules/nf-core/gcta/bivariatereml/environment.yml b/modules/nf-core/gcta/bivariatereml/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/bivariatereml/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/bivariatereml/main.nf b/modules/nf-core/gcta/bivariatereml/main.nf new file mode 100644 index 000000000000..51c1164aba7f --- /dev/null +++ b/modules/nf-core/gcta/bivariatereml/main.nf @@ -0,0 +1,47 @@ +process GCTA_BIVARIATEREML { + tag "bivariate_reml_${meta.id}_${meta2.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : + 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + + input: + tuple val(meta), path(phenotype_file) + tuple val(meta2), path(grm_id), path(grm_bin), path(grm_n_bin) + tuple val(meta3), path(quant_covariates_file) + tuple val(meta4), path(cat_covariates_file) + + output: + tuple val(meta), path("${meta.id}.hsq"), emit: bivariate_results + tuple val(meta), path("${meta.id}.log"), emit: log_file + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def pair_id = meta.id + def qcovar_param = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : '' + def covar_param = cat_covariates_file ? "--covar ${cat_covariates_file}" : '' + def extra_args = task.ext.args ?: '' + + """ + set -euo pipefail + + gcta \\ + --reml-bivar 1 2 \\ + --grm ${meta2.id} \\ + --pheno "${phenotype_file}" \\ + ${qcovar_param} \\ + ${covar_param} \\ + --out "${pair_id}" \\ + --thread-num ${task.cpus} ${extra_args} + """ + + stub: + """ + touch "${meta.id}.hsq" + touch "${meta.id}.log" + """ +} diff --git a/modules/nf-core/gcta/bivariatereml/meta.yml b/modules/nf-core/gcta/bivariatereml/meta.yml new file mode 100644 index 000000000000..e87314bc570b --- /dev/null +++ b/modules/nf-core/gcta/bivariatereml/meta.yml @@ -0,0 +1,118 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_bivariatereml" +description: Run bivariate REML analysis with a single dense GRM +keywords: + - gcta + - reml + - bivariate + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" +input: + - - meta: + type: map + description: | + Groovy map containing shared bivariate phenotype metadata + `meta.id` must contain both trait names, for example `trait1__trait2` + - phenotype_file: + type: file + description: Shared bivariate phenotype file passed to `--pheno` + pattern: "*.{phe,pheno,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta2: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - grm_id: + type: file + description: Dense GRM sample identifier file + pattern: "*.grm.id" + ontologies: [] + - grm_bin: + type: file + description: Dense GRM binary matrix file + pattern: "*.grm.bin" + ontologies: [] + - grm_n_bin: + type: file + description: Dense GRM sample-count matrix file + pattern: "*.grm.N.bin" + ontologies: [] + - - meta3: + type: map + description: | + Groovy map containing quantitative covariate metadata + e.g. `[ id:'covariates_quant' ]` + - quant_covariates_file: + type: file + description: Quantitative covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta4: + type: map + description: | + Groovy map containing categorical covariate metadata + e.g. `[ id:'covariates_cat' ]` + - cat_covariates_file: + type: file + description: Categorical covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" +output: + bivariate_results: + - - meta: + type: map + description: | + Groovy map containing shared bivariate phenotype metadata + `meta.id` must contain both trait names, for example `trait1__trait2` + - "${meta.id}.hsq": + type: file + description: Bivariate REML result file + pattern: "${meta.id}.hsq" + ontologies: + - edam: "http://edamontology.org/format_2330" + log_file: + - - meta: + type: map + description: | + Groovy map containing shared bivariate phenotype metadata + `meta.id` must contain both trait names, for example `trait1__trait2` + - "${meta.id}.log": + type: file + description: Bivariate REML log file + pattern: "${meta.id}.log" + ontologies: + - edam: "http://edamontology.org/format_2330" + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | head -n 1": + type: eval + description: The command used to retrieve the GCTA version +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | head -n 1: + type: eval + description: The command used to retrieve the GCTA version +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/bivariatereml/tests/main.nf.test b/modules/nf-core/gcta/bivariatereml/tests/main.nf.test new file mode 100644 index 000000000000..e1a7dba423c6 --- /dev/null +++ b/modules/nf-core/gcta/bivariatereml/tests/main.nf.test @@ -0,0 +1,86 @@ +nextflow_process { + name "Test Process GCTA_BIVARIATEREML" + script "../main.nf" + process "GCTA_BIVARIATEREML" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/bivariatereml" + + test("homo_sapiens popgen - bivariate phenotype with dense GRM and covariates") { + config "./nextflow.config" + when { + process { + """ + input[0] = [ + [ id:"Trait1__Trait2" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_bivariate.noheader.txt", checkIfExists: true) + ] + input[1] = [ + [ id:"gcta_grm_0" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_0.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_0.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_0.grm.N.bin", checkIfExists: true) + ] + input[2] = [ + [ id:"covariates_quant" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_covariates.quant.noheader.txt", checkIfExists: true) + ] + input[3] = [ + [ id:"covariates_cat" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_covariates.cat.noheader.txt", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.bivariate_results.size() == 1 }, + { assert process.out.log_file.size() == 1 }, + { assert process.out.bivariate_results.get(0).get(0).id == "Trait1__Trait2" }, + { assert file(process.out.log_file.get(0).get(1)).name == "Trait1__Trait2.log" }, + { assert file(process.out.log_file.get(0).get(1)).exists() }, + { + assert snapshot( + process.out.bivariate_results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - bivariate phenotype with dense GRM - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:"Trait1__Trait2" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_bivariate.noheader.txt", checkIfExists: true) + ] + input[1] = [ + [ id:"gcta_grm_0" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_0.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_0.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_0.grm.N.bin", checkIfExists: true) + ] + input[2] = [[ id:"covariates_quant" ], []] + input[3] = [[ id:"covariates_cat" ], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/bivariatereml/tests/main.nf.test.snap b/modules/nf-core/gcta/bivariatereml/tests/main.nf.test.snap new file mode 100644 index 000000000000..8eb6d074f34f --- /dev/null +++ b/modules/nf-core/gcta/bivariatereml/tests/main.nf.test.snap @@ -0,0 +1,85 @@ +{ + "homo_sapiens popgen - bivariate phenotype with dense GRM and covariates": { + "content": [ + [ + [ + { + "id": "Trait1__Trait2" + }, + "Trait1__Trait2.hsq:md5,8cca31b34b14613e781bc840040380ba" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_BIVARIATEREML", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:43:35.812149857" + }, + "homo_sapiens popgen - bivariate phenotype with dense GRM - stub": { + "content": [ + { + "0": [ + [ + { + "id": "Trait1__Trait2" + }, + "Trait1__Trait2.hsq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "Trait1__Trait2" + }, + "Trait1__Trait2.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "GCTA_BIVARIATEREML", + "gcta", + "*******************************************************************" + ] + ], + "bivariate_results": [ + [ + { + "id": "Trait1__Trait2" + }, + "Trait1__Trait2.hsq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_file": [ + [ + { + "id": "Trait1__Trait2" + }, + "Trait1__Trait2.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_BIVARIATEREML", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T13:53:21.898185124" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/bivariatereml/tests/nextflow.config b/modules/nf-core/gcta/bivariatereml/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/bivariatereml/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} diff --git a/modules/nf-core/gcta/bivariateremlldms/environment.yml b/modules/nf-core/gcta/bivariateremlldms/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/bivariateremlldms/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/bivariateremlldms/main.nf b/modules/nf-core/gcta/bivariateremlldms/main.nf new file mode 100644 index 000000000000..9de71dba4a2a --- /dev/null +++ b/modules/nf-core/gcta/bivariateremlldms/main.nf @@ -0,0 +1,49 @@ +process GCTA_BIVARIATEREMLLDMS { + tag "bivariate_reml_ldms_${meta.id}_${meta2.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : + 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + + input: + tuple val(meta), path(phenotype_file) + tuple val(meta2), path(mgrm_file), path(grm_files) + tuple val(meta3), path(quant_covariates_file) + tuple val(meta4), path(cat_covariates_file) + + output: + tuple val(meta), path("${meta.id}.hsq"), emit: bivariate_results + tuple val(meta), path("${meta.id}.log"), emit: log_file + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def pair_id = meta.id + def qcovar_param = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : '' + def covar_param = cat_covariates_file ? "--covar ${cat_covariates_file}" : '' + def extra_args = task.ext.args ?: '' + + """ + set -euo pipefail + + gcta \\ + --reml-bivar 1 2 \\ + --mgrm ${mgrm_file} \\ + --pheno "${phenotype_file}" \\ + ${qcovar_param} \\ + ${covar_param} \\ + --reml-bivar-no-constrain \\ + --reml-maxit 500 \\ + --out "${pair_id}" \\ + --thread-num ${task.cpus} ${extra_args} + """ + + stub: + """ + touch "${meta.id}.hsq" + touch "${meta.id}.log" + """ +} diff --git a/modules/nf-core/gcta/bivariateremlldms/meta.yml b/modules/nf-core/gcta/bivariateremlldms/meta.yml new file mode 100644 index 000000000000..03d3f8509889 --- /dev/null +++ b/modules/nf-core/gcta/bivariateremlldms/meta.yml @@ -0,0 +1,115 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_bivariateremlldms" +description: Run bivariate REML-LDMS analysis with an MGRM manifest +keywords: + - gcta + - reml + - bivariate + - ldms + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" +input: + - - meta: + type: map + description: | + Groovy map containing shared bivariate phenotype metadata + `meta.id` must contain both trait names, for example `trait1__trait2` + - phenotype_file: + type: file + description: Shared bivariate phenotype file passed to `--pheno` + pattern: "*.{phe,pheno,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta2: + type: map + description: | + Groovy map containing MGRM metadata + e.g. `[ id:'plink_simulated_ldms' ]` + - mgrm_file: + type: file + description: MGRM manifest file + pattern: "*.mgrm" + ontologies: + - edam: "http://edamontology.org/format_2330" + - grm_files: + type: file + description: GRM sidecar files referenced by `mgrm_file` + pattern: "*" + ontologies: [] + - - meta3: + type: map + description: | + Groovy map containing quantitative covariate metadata + e.g. `[ id:'covariates_quant' ]` + - quant_covariates_file: + type: file + description: Quantitative covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta4: + type: map + description: | + Groovy map containing categorical covariate metadata + e.g. `[ id:'covariates_cat' ]` + - cat_covariates_file: + type: file + description: Categorical covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" +output: + bivariate_results: + - - meta: + type: map + description: | + Groovy map containing shared bivariate phenotype metadata + `meta.id` must contain both trait names, for example `trait1__trait2` + - "${meta.id}.hsq": + type: file + description: Bivariate REML-LDMS result file + pattern: "${meta.id}.hsq" + ontologies: + - edam: "http://edamontology.org/format_2330" + log_file: + - - meta: + type: map + description: | + Groovy map containing shared bivariate phenotype metadata + `meta.id` must contain both trait names, for example `trait1__trait2` + - "${meta.id}.log": + type: file + description: Bivariate REML-LDMS log file + pattern: "${meta.id}.log" + ontologies: + - edam: "http://edamontology.org/format_2330" + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | head -n 1": + type: eval + description: The command used to retrieve the GCTA version +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | head -n 1: + type: eval + description: The command used to retrieve the GCTA version +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test b/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test new file mode 100644 index 000000000000..b0b456ba935a --- /dev/null +++ b/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test @@ -0,0 +1,92 @@ +nextflow_process { + name "Test Process GCTA_BIVARIATEREMLLDMS" + script "../main.nf" + process "GCTA_BIVARIATEREMLLDMS" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/bivariateremlldms" + + test("homo_sapiens popgen - bivariate phenotype with ldms mgrm and covariates") { + config "./nextflow.config" + when { + process { + """ + input[0] = [ + [ id:"Trait1__Trait2" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_bivariate.noheader.txt", checkIfExists: true) + ] + input[1] = [ + [ id:"gcta_bivariate_ldms" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_bivariate_ldms.mgrm", checkIfExists: true), + [ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm.grm.N.bin", checkIfExists: true) + ] + ] + input[2] = [ + [ id:"covariates_quant" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_covariates.quant.noheader.txt", checkIfExists: true) + ] + input[3] = [ + [ id:"covariates_cat" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_covariates.cat.noheader.txt", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.bivariate_results.size() == 1 }, + { assert process.out.log_file.size() == 1 }, + { assert process.out.bivariate_results.get(0).get(0).id == "Trait1__Trait2" }, + { assert file(process.out.log_file.get(0).get(1)).name == "Trait1__Trait2.log" }, + { assert file(process.out.log_file.get(0).get(1)).exists() }, + { + assert snapshot( + process.out.bivariate_results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - bivariate phenotype with ldms mgrm - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:"Trait1__Trait2" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_bivariate.noheader.txt", checkIfExists: true) + ] + input[1] = [ + [ id:"gcta_bivariate_ldms" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_bivariate_ldms.mgrm", checkIfExists: true), + [ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm.grm.N.bin", checkIfExists: true) + ] + ] + input[2] = [[ id:"covariates_quant" ], []] + input[3] = [[ id:"covariates_cat" ], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test.snap b/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test.snap new file mode 100644 index 000000000000..057cb1baf43a --- /dev/null +++ b/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test.snap @@ -0,0 +1,85 @@ +{ + "homo_sapiens popgen - bivariate phenotype with ldms mgrm and covariates": { + "content": [ + [ + [ + { + "id": "Trait1__Trait2" + }, + "Trait1__Trait2.hsq:md5,5cbdc47d28a46d61567a91003f32c579" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_BIVARIATEREMLLDMS", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:43:49.100858578" + }, + "homo_sapiens popgen - bivariate phenotype with ldms mgrm - stub": { + "content": [ + { + "0": [ + [ + { + "id": "Trait1__Trait2" + }, + "Trait1__Trait2.hsq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "Trait1__Trait2" + }, + "Trait1__Trait2.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "GCTA_BIVARIATEREMLLDMS", + "gcta", + "*******************************************************************" + ] + ], + "bivariate_results": [ + [ + { + "id": "Trait1__Trait2" + }, + "Trait1__Trait2.hsq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_file": [ + [ + { + "id": "Trait1__Trait2" + }, + "Trait1__Trait2.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_BIVARIATEREMLLDMS", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:08:47.444308099" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/bivariateremlldms/tests/nextflow.config b/modules/nf-core/gcta/bivariateremlldms/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/bivariateremlldms/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} diff --git a/modules/nf-core/gcta/calculateldscores/environment.yml b/modules/nf-core/gcta/calculateldscores/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/calculateldscores/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/calculateldscores/main.nf b/modules/nf-core/gcta/calculateldscores/main.nf new file mode 100644 index 000000000000..f26837acda89 --- /dev/null +++ b/modules/nf-core/gcta/calculateldscores/main.nf @@ -0,0 +1,69 @@ +process GCTA_CALCULATELDSCORES { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : + 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + + input: + tuple val(meta), path(bed), path(bim), path(fam) + + output: + tuple val(meta), path("${meta.id}_gcta_ld.score.ld"), emit: ld_scores + tuple val(meta), path("${meta.id}_snp_group1.txt"), path("${meta.id}_snp_group2.txt"), path("${meta.id}_snp_group3.txt"), path("${meta.id}_snp_group4.txt"), emit: snp_group_files + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def extra_args = task.ext.args ?: '' + + """ + set -euo pipefail + + gcta \\ + --bfile ${meta.id} \\ + --ld-score-region 200 \\ + --out ${meta.id}_gcta_ld \\ + --thread-num ${task.cpus} ${extra_args} + + ld_file="${meta.id}_gcta_ld.score.ld" + sorted_file="ldscore.sorted.tsv" + + awk 'NR > 1 { print \$1 "\\t" \$8 }' "${meta.id}_gcta_ld.score.ld" | sort -k2,2n > "\${sorted_file}" + + count=\$(wc -l < "\${sorted_file}") + q1_idx=\$(( (count + 3) / 4 )) + q2_idx=\$(( (count + 1) / 2 )) + q3_idx=\$(( (3 * count + 1) / 4 )) + + q1=\$(awk -v idx="\${q1_idx}" 'NR == idx { print \$2 }' "\${sorted_file}") + q2=\$(awk -v idx="\${q2_idx}" 'NR == idx { print \$2 }' "\${sorted_file}") + q3=\$(awk -v idx="\${q3_idx}" 'NR == idx { print \$2 }' "\${sorted_file}") + + awk -v q1="\${q1}" -v q2="\${q2}" -v q3="\${q3}" -v prefix="${meta.id}" ' + NR > 1 { + if (\$8 <= q1) { + print \$1 >> prefix "_snp_group1.txt" + } else if (\$8 <= q2) { + print \$1 >> prefix "_snp_group2.txt" + } else if (\$8 <= q3) { + print \$1 >> prefix "_snp_group3.txt" + } else { + print \$1 >> prefix "_snp_group4.txt" + } + } + ' "\${ld_file}" + """ + + stub: + """ + touch ${meta.id}_gcta_ld.score.ld + touch ${meta.id}_snp_group1.txt + touch ${meta.id}_snp_group2.txt + touch ${meta.id}_snp_group3.txt + touch ${meta.id}_snp_group4.txt + """ +} diff --git a/modules/nf-core/gcta/calculateldscores/meta.yml b/modules/nf-core/gcta/calculateldscores/meta.yml new file mode 100644 index 000000000000..16206208802d --- /dev/null +++ b/modules/nf-core/gcta/calculateldscores/meta.yml @@ -0,0 +1,105 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_calculateldscores" +description: Calculate LD scores with GCTA and segment variants into LD-based SNP groups +keywords: + - gcta + - ld score + - ldms + - genetics +tools: + - "gcta": + description: "GCTA is a tool for genome-wide complex trait analysis." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://github.com/jianyangqt/gcta" + licence: ["GPL-3.0-only"] +input: + - - meta: + type: map + description: | + Groovy Map containing PLINK1 genotype metadata + e.g. `[ id:'plink_simulated' ]` + - bed: + type: file + description: PLINK1 genotype primary file + pattern: "*.{bed}" + ontologies: + - edam: "http://edamontology.org/format_3003" + - bim: + type: file + description: PLINK1 variant metadata file + pattern: "*.{bim}" + ontologies: [] + - fam: + type: file + description: PLINK1 sample metadata file + pattern: "*.{fam}" + ontologies: [] +output: + ld_scores: + - - meta: + type: map + description: | + Groovy Map containing PLINK1 genotype metadata + e.g. `[ id:'plink_simulated' ]` + - "${meta.id}_gcta_ld.score.ld": + type: file + description: GCTA LD score output file + pattern: "${meta.id}_gcta_ld.score.ld" + ontologies: + - edam: "http://edamontology.org/format_2330" + snp_group_files: + - - meta: + type: map + description: | + Groovy Map containing PLINK1 genotype metadata + e.g. `[ id:'plink_simulated' ]` + - "${meta.id}_snp_group1.txt": + type: file + description: First LD-derived SNP-group file + pattern: "${meta.id}_snp_group1.txt" + ontologies: + - edam: "http://edamontology.org/format_2330" + - "${meta.id}_snp_group2.txt": + type: file + description: Second LD-derived SNP-group file + pattern: "${meta.id}_snp_group2.txt" + ontologies: + - edam: "http://edamontology.org/format_2330" + - "${meta.id}_snp_group3.txt": + type: file + description: Third LD-derived SNP-group file + pattern: "${meta.id}_snp_group3.txt" + ontologies: + - edam: "http://edamontology.org/format_2330" + - "${meta.id}_snp_group4.txt": + type: file + description: Fourth LD-derived SNP-group file + pattern: "${meta.id}_snp_group4.txt" + ontologies: + - edam: "http://edamontology.org/format_2330" + versions_gcta: + - - "${task.process}": + type: string + description: The process the versions were collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | head -n 1": + type: eval + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | head -n 1: + type: eval + description: The command used to generate the version of the tool +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/calculateldscores/tests/main.nf.test b/modules/nf-core/gcta/calculateldscores/tests/main.nf.test new file mode 100644 index 000000000000..41704f0b1a54 --- /dev/null +++ b/modules/nf-core/gcta/calculateldscores/tests/main.nf.test @@ -0,0 +1,74 @@ +nextflow_process { + + name "Test Process GCTA_CALCULATELDSCORES" + script "../main.nf" + process "GCTA_CALCULATELDSCORES" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/calculateldscores" + + test("homo_sapiens popgen - chr01 plink1") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'chr01.vcf' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/chr01.vcf.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/chr01.vcf.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/chr01.vcf.fam', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.ld_scores.size() == 1 }, + { assert process.out.snp_group_files.size() == 1 }, + { assert process.out.ld_scores.get(0).get(0).id == 'chr01.vcf' }, + { + def snpGroups = process.out.snp_group_files.get(0) + assert snpGroups.get(0).id == 'chr01.vcf' + assert (1..4).every { idx -> path(snpGroups.get(idx)).exists() } + }, + { + assert snapshot( + process.out.ld_scores, + process.out.snp_group_files, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - chr01 plink1 - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'chr01.vcf' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/chr01.vcf.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/chr01.vcf.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/chr01.vcf.fam', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/calculateldscores/tests/main.nf.test.snap b/modules/nf-core/gcta/calculateldscores/tests/main.nf.test.snap new file mode 100644 index 000000000000..346a46a6ed9b --- /dev/null +++ b/modules/nf-core/gcta/calculateldscores/tests/main.nf.test.snap @@ -0,0 +1,102 @@ +{ + "homo_sapiens popgen - chr01 plink1 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "chr01.vcf" + }, + "chr01.vcf_gcta_ld.score.ld:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "chr01.vcf" + }, + "chr01.vcf_snp_group1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chr01.vcf_snp_group2.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chr01.vcf_snp_group3.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chr01.vcf_snp_group4.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "GCTA_CALCULATELDSCORES", + "gcta", + "*******************************************************************" + ] + ], + "ld_scores": [ + [ + { + "id": "chr01.vcf" + }, + "chr01.vcf_gcta_ld.score.ld:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "snp_group_files": [ + [ + { + "id": "chr01.vcf" + }, + "chr01.vcf_snp_group1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chr01.vcf_snp_group2.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chr01.vcf_snp_group3.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chr01.vcf_snp_group4.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_CALCULATELDSCORES", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:16:34.838851258" + }, + "homo_sapiens popgen - chr01 plink1": { + "content": [ + [ + [ + { + "id": "chr01.vcf" + }, + "chr01.vcf_gcta_ld.score.ld:md5,374d4f55f66ff41d4d941ad181114205" + ] + ], + [ + [ + { + "id": "chr01.vcf" + }, + "chr01.vcf_snp_group1.txt:md5,e249439ac7f63ebaccf295b61e08cf7a", + "chr01.vcf_snp_group2.txt:md5,7061c4628a05c528355e9cb068b5cefb", + "chr01.vcf_snp_group3.txt:md5,60195886c40ab436fe623f4c7aa0e323", + "chr01.vcf_snp_group4.txt:md5,0f1cc15d0b496448ed0d55233a443181" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_CALCULATELDSCORES", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:16:26.216820259" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/calculateldscores/tests/nextflow.config b/modules/nf-core/gcta/calculateldscores/tests/nextflow.config new file mode 100644 index 000000000000..76b9ab148074 --- /dev/null +++ b/modules/nf-core/gcta/calculateldscores/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv('NF_MODULES_TESTDATA_BASE_PATH') ?: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' +} diff --git a/modules/nf-core/gcta/fastgwa/environment.yml b/modules/nf-core/gcta/fastgwa/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/fastgwa/main.nf b/modules/nf-core/gcta/fastgwa/main.nf new file mode 100644 index 000000000000..c488883ea0b7 --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/main.nf @@ -0,0 +1,54 @@ +process GCTA_FASTGWA { + tag "${meta.id}:${meta3.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : + 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + + input: + tuple val(meta), path(bed_pgen), path(bim_pvar), path(fam_psam) + tuple val(meta2), path(sparse_grm_id), path(sparse_grm_sp) + tuple val(meta3), path(phenotype_file) + tuple val(meta4), path(quant_covariates_file) + tuple val(meta5), path(cat_covariates_file) + + output: + tuple val(meta), val(meta3), path("${meta.id}_${meta3.id}.fastGWA"), emit: results + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def qcovar_arg = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : '' + def covar_arg = cat_covariates_file ? "--covar ${cat_covariates_file}" : '' + def mpheno_arg = meta3.mpheno ? "--mpheno ${meta3.mpheno}" : '' + def grm_prefix = meta2.id + def genotype_suffix = bed_pgen.name.tokenize('.').last() + def genotype_flag = genotype_suffix == 'pgen' ? '--pfile' : '--bfile' + def genotype_prefix = bed_pgen.name.replaceFirst(/\.(bed|pgen)$/, '') + def out = "${meta.id}_${meta3.id}" + def extra_args = task.ext.args ?: '' + def mode_arg = extra_args.contains('--fastGWA-mlm-exact') || extra_args.contains('--fastGWA-lr') ? '' : '--fastGWA-mlm' + + """ + set -euo pipefail + + gcta \\ + ${genotype_flag} ${genotype_prefix} \\ + --grm-sparse ${grm_prefix} \\ + ${mode_arg} \\ + --pheno ${phenotype_file} \\ + ${qcovar_arg} \\ + ${covar_arg} \\ + ${mpheno_arg} \\ + --thread-num ${task.cpus} \\ + --out ${out} ${extra_args} + """ + + stub: + """ + touch ${meta.id}_${meta3.id}.fastGWA + """ +} diff --git a/modules/nf-core/gcta/fastgwa/meta.yml b/modules/nf-core/gcta/fastgwa/meta.yml new file mode 100644 index 000000000000..ab66bad26a32 --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/meta.yml @@ -0,0 +1,128 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_fastgwa" +description: Run GCTA fastGWA-MLM with PLINK genotype inputs and a sparse GRM +keywords: + - gcta + - fastgwa + - gwas + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" +input: + - - meta: + type: map + description: | + Groovy map containing PLINK genotype metadata + e.g. `[ id:'plink_simulated' ]` + - bed_pgen: + type: file + description: PLINK primary genotype file, either `.bed` or `.pgen` + pattern: "*.{bed,pgen}" + ontologies: + - edam: "http://edamontology.org/format_3003" + - bim_pvar: + type: file + description: PLINK sidecar file, either `.bim` or `.pvar` + pattern: "*.{bim,pvar}" + ontologies: [] + - fam_psam: + type: file + description: PLINK sidecar file, either `.fam` or `.psam` + pattern: "*.{fam,psam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy map containing sparse GRM metadata + e.g. `[ id:'plink_simulated_sp' ]` + - sparse_grm_id: + type: file + description: Sparse GRM ID file (`.grm.id`) + pattern: "*.grm.id" + ontologies: [] + - sparse_grm_sp: + type: file + description: Sparse GRM sparse matrix file (`.grm.sp`) + pattern: "*.grm.sp" + ontologies: [] + - - meta3: + type: map + description: | + Groovy map containing phenotype metadata + e.g. `[ id:'QuantitativeTrait' ]` + Optional phenotype selector may be supplied as `meta3.mpheno` + - phenotype_file: + type: file + description: Phenotype file + pattern: "*.{phe,pheno,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta4: + type: map + description: | + Groovy map containing quantitative covariate metadata + e.g. `[ id:'covariates_quant' ]` + - quant_covariates_file: + type: file + description: Quantitative covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta5: + type: map + description: | + Groovy map containing categorical covariate metadata + e.g. `[ id:'covariates_cat' ]` + - cat_covariates_file: + type: file + description: Categorical covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" +output: + results: + - - meta: + type: map + description: | + Groovy map containing PLINK genotype metadata + e.g. `[ id:'plink_simulated' ]` + - meta3: + type: map + description: | + Groovy map containing phenotype metadata + e.g. `[ id:'QuantitativeTrait' ]` + - "${meta.id}_${meta3.id}.fastGWA": + type: file + description: FastGWA association results + pattern: "${meta.id}_${meta3.id}.fastGWA" + ontologies: + - edam: "http://edamontology.org/format_2330" + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | head -n 1": + type: eval + description: The command used to retrieve the GCTA version +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | head -n 1: + type: eval + description: The command used to retrieve the GCTA version +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/fastgwa/tests/main.nf.test b/modules/nf-core/gcta/fastgwa/tests/main.nf.test new file mode 100644 index 000000000000..016019df9630 --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/tests/main.nf.test @@ -0,0 +1,96 @@ +nextflow_process { + + name "Test Process GCTA_FASTGWA" + script "../main.nf" + process "GCTA_FASTGWA" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/fastgwa" + + test("homo_sapiens popgen - plink1 with sparse GRM and covariates") { + config "./nextflow.config" + when { + process { + """ + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bed", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true) + ] + input[1] = [ + [ id:"plink_simulated_sp" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_sp.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_sp.grm.sp", checkIfExists: true) + ] + input[2] = [ + [ id:"QuantitativeTrait" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_quantitative.noheader.txt", checkIfExists: true) + ] + input[3] = [ + [ id:"covariates_quant" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_covariates.quant.noheader.txt", checkIfExists: true) + ] + input[4] = [ + [ id:"covariates_cat" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_covariates.cat.noheader.txt", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.results.size() == 1 }, + { assert process.out.results.get(0).get(0).id == "plink_simulated" }, + { assert process.out.results.get(0).get(1).id == "QuantitativeTrait" }, + { assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_QuantitativeTrait.fastGWA" }, + { + assert snapshot( + process.out.results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - plink2 with sparse GRM - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pgen", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pvar", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.psam", checkIfExists: true) + ] + input[1] = [ + [ id:"plink_simulated_sp" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_sp.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_sp.grm.sp", checkIfExists: true) + ] + input[2] = [ + [ id:"QuantitativeTrait" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_quantitative.noheader.txt", checkIfExists: true) + ] + input[3] = [[ id:"covariates_quant" ], []] + input[4] = [[ id:"covariates_cat" ], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap b/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap new file mode 100644 index 000000000000..48c9ca0e7b23 --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap @@ -0,0 +1,78 @@ +{ + "homo_sapiens popgen - plink2 with sparse GRM - stub": { + "content": [ + { + "0": [ + [ + { + "id": "plink_simulated" + }, + { + "id": "QuantitativeTrait" + }, + "plink_simulated_QuantitativeTrait.fastGWA:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GCTA_FASTGWA", + "gcta", + "*******************************************************************" + ] + ], + "results": [ + [ + { + "id": "plink_simulated" + }, + { + "id": "QuantitativeTrait" + }, + "plink_simulated_QuantitativeTrait.fastGWA:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_FASTGWA", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:13:39.722374131" + }, + "homo_sapiens popgen - plink1 with sparse GRM and covariates": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + { + "id": "QuantitativeTrait" + }, + "plink_simulated_QuantitativeTrait.fastGWA:md5,09d7c5ed57c214e91bbb21fd49f0eadc" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_FASTGWA", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:20:29.849273904" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/fastgwa/tests/nextflow.config b/modules/nf-core/gcta/fastgwa/tests/nextflow.config new file mode 100644 index 000000000000..71a0143df3e3 --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/tests/nextflow.config @@ -0,0 +1,9 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} + +process { + withName: "GCTA_FASTGWA" { + ext.args = { "--fastGWA-mlm-exact" } + } +} diff --git a/modules/nf-core/gcta/filtergrmwithkeep/environment.yml b/modules/nf-core/gcta/filtergrmwithkeep/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/filtergrmwithkeep/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/filtergrmwithkeep/main.nf b/modules/nf-core/gcta/filtergrmwithkeep/main.nf new file mode 100644 index 000000000000..f06fc5968a53 --- /dev/null +++ b/modules/nf-core/gcta/filtergrmwithkeep/main.nf @@ -0,0 +1,39 @@ +process GCTA_FILTERGRMWITHKEEP { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : + 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + + input: + tuple val(meta), path(grm_id), path(grm_bin), path(grm_n_bin) + tuple val(meta2), path(keep_file) + + output: + tuple val(meta), path("${meta.id}_unrel.grm.id"), path("${meta.id}_unrel.grm.bin"), path("${meta.id}_unrel.grm.N.bin"), emit: filtered_grm + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + gcta \\ + --grm ${meta.id} \\ + --keep ${keep_file} \\ + --make-grm \\ + --out ${meta.id}_unrel \\ + --thread-num ${task.cpus} \\ + ${args} + """ + + stub: + """ + touch ${meta.id}_unrel.grm.id + touch ${meta.id}_unrel.grm.bin + touch ${meta.id}_unrel.grm.N.bin + """ +} diff --git a/modules/nf-core/gcta/filtergrmwithkeep/meta.yml b/modules/nf-core/gcta/filtergrmwithkeep/meta.yml new file mode 100644 index 000000000000..e7dfa3fe898f --- /dev/null +++ b/modules/nf-core/gcta/filtergrmwithkeep/meta.yml @@ -0,0 +1,96 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_filtergrmwithkeep" +description: Filter a dense GRM to the individuals listed in a keep file +keywords: + - gcta + - grm + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + +input: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - grm_id: + type: file + description: Dense GRM sample identifier file + pattern: "*.grm.id" + ontologies: [] + - grm_bin: + type: file + description: Dense GRM binary matrix file + pattern: "*.grm.bin" + ontologies: [] + - grm_n_bin: + type: file + description: Dense GRM sample-count matrix file + pattern: "*.grm.N.bin" + ontologies: [] + - - meta2: + type: map + description: | + Groovy map containing keep-file metadata + e.g. `[ id:'plink_simulated_keep' ]` + - keep_file: + type: file + description: Keep file listing the individuals to retain + pattern: "*.{keep,txt,id}" + ontologies: + - edam: "http://edamontology.org/format_2330" + +output: + filtered_grm: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - "${meta.id}_unrel.grm.id": + type: file + description: Filtered GRM sample identifier file + pattern: "${meta.id}_unrel.grm.id" + ontologies: [] + - "${meta.id}_unrel.grm.bin": + type: file + description: Filtered GRM binary matrix file + pattern: "${meta.id}_unrel.grm.bin" + ontologies: [] + - "${meta.id}_unrel.grm.N.bin": + type: file + description: Filtered GRM sample-count matrix file + pattern: "${meta.id}_unrel.grm.N.bin" + ontologies: [] + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | head -n 1": + type: eval + description: The command used to retrieve the GCTA version + +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | head -n 1: + type: eval + description: The command used to retrieve the GCTA version + +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test b/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test new file mode 100644 index 000000000000..7e8be4435e75 --- /dev/null +++ b/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test @@ -0,0 +1,86 @@ +nextflow_process { + + name "Test Process GCTA_FILTERGRMWITHKEEP" + script "../main.nf" + process "GCTA_FILTERGRMWITHKEEP" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/filtergrmwithkeep" + tag "gcta/removerelatedsubjects" + + setup { + run("GCTA_REMOVERELATEDSUBJECTS", alias: "GCTA_REMOVERELATEDSUBJECTS_KEEP") { + script "../../removerelatedsubjects/main.nf" + process { + """ + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.N.bin", checkIfExists: true) + ] + """ + } + } + } + + test("homo_sapiens popgen - filter dense GRM with keep file") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.N.bin", checkIfExists: true) + ] + input[1] = GCTA_REMOVERELATEDSUBJECTS_KEEP.out.keep_file + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.filtered_grm.size() == 1 }, + { assert process.out.filtered_grm.get(0).get(0).id == "plink_simulated" }, + { + assert snapshot( + process.out.filtered_grm, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - filter dense GRM with keep file - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.N.bin", checkIfExists: true) + ] + input[1] = GCTA_REMOVERELATEDSUBJECTS_KEEP.out.keep_file + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test.snap b/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test.snap new file mode 100644 index 000000000000..9d98baf17027 --- /dev/null +++ b/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test.snap @@ -0,0 +1,75 @@ +{ + "homo_sapiens popgen - filter dense GRM with keep file": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_unrel.grm.id:md5,ca8c0bded6951fdd3bf0dddc97b6df6b", + "plink_simulated_unrel.grm.bin:md5,b1f124463eecbae86840a6651eec372d", + "plink_simulated_unrel.grm.N.bin:md5,06b73ea8bae8f1e5f5d4de33dbd2c75e" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_FILTERGRMWITHKEEP", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:20:44.100270115" + }, + "homo_sapiens popgen - filter dense GRM with keep file - stub": { + "content": [ + { + "0": [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_unrel.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_unrel.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_unrel.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GCTA_FILTERGRMWITHKEEP", + "gcta", + "*******************************************************************" + ] + ], + "filtered_grm": [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_unrel.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_unrel.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_unrel.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_FILTERGRMWITHKEEP", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:14:02.867340463" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/filtergrmwithkeep/tests/nextflow.config b/modules/nf-core/gcta/filtergrmwithkeep/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/filtergrmwithkeep/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} diff --git a/modules/nf-core/gcta/makebksparse/environment.yml b/modules/nf-core/gcta/makebksparse/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/makebksparse/main.nf b/modules/nf-core/gcta/makebksparse/main.nf new file mode 100644 index 000000000000..ac8195303e08 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/main.nf @@ -0,0 +1,37 @@ +process GCTA_MAKEBKSPARSE { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : + 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + + input: + tuple val(meta), path(grm_id), path(grm_bin), path(grm_n_bin) + val(cutoff) + + output: + tuple val(meta), path("${meta.id}_sp.grm.id"), path("${meta.id}_sp.grm.sp"), emit: sparse_grm_files + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + gcta \\ + --grm ${meta.id} \\ + --make-bK-sparse ${cutoff} \\ + --out ${meta.id}_sp \\ + --thread-num ${task.cpus} \\ + ${args} + """ + + stub: + """ + touch ${meta.id}_sp.grm.id + touch ${meta.id}_sp.grm.sp + """ +} diff --git a/modules/nf-core/gcta/makebksparse/meta.yml b/modules/nf-core/gcta/makebksparse/meta.yml new file mode 100644 index 000000000000..dc17568d739f --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/meta.yml @@ -0,0 +1,84 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_makebksparse" +description: Create a sparse GRM from a dense GRM for downstream fastGWA analyses +keywords: + - gcta + - grm + - sparse + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + +input: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - grm_id: + type: file + description: Dense GRM sample identifier file + pattern: "*.grm.id" + ontologies: [] + - grm_bin: + type: file + description: Dense GRM binary matrix file + pattern: "*.grm.bin" + ontologies: [] + - grm_n_bin: + type: file + description: Dense GRM sample-count matrix file + pattern: "*.grm.N.bin" + ontologies: [] + - cutoff: + type: float + description: Sparse GRM cutoff passed to `--make-bK-sparse` + +output: + sparse_grm_files: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - "${meta.id}_sp.grm.id": + type: file + description: Sparse GRM sample identifier file + pattern: "${meta.id}_sp.grm.id" + ontologies: [] + - "${meta.id}_sp.grm.sp": + type: file + description: Sparse GRM matrix file + pattern: "${meta.id}_sp.grm.sp" + ontologies: [] + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | head -n 1": + type: eval + description: The command used to retrieve the GCTA version + +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | head -n 1: + type: eval + description: The command used to retrieve the GCTA version + +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test b/modules/nf-core/gcta/makebksparse/tests/main.nf.test new file mode 100644 index 000000000000..129dce09da5a --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test @@ -0,0 +1,69 @@ +nextflow_process { + + name "Test Process GCTA_MAKEBKSPARSE" + script "../main.nf" + process "GCTA_MAKEBKSPARSE" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/makebksparse" + + test("homo_sapiens popgen - create sparse GRM") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.N.bin", checkIfExists: true) + ] + input[1] = 0.05 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.sparse_grm_files.size() == 1 }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "plink_simulated" }, + { + assert snapshot( + process.out.sparse_grm_files, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - create sparse GRM - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.N.bin", checkIfExists: true) + ] + input[1] = 0.05 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap new file mode 100644 index 000000000000..6db0c0f9ab7e --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "homo_sapiens popgen - create sparse GRM": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "plink_simulated_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_MAKEBKSPARSE", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:20:58.183672595" + }, + "homo_sapiens popgen - create sparse GRM - stub": { + "content": [ + { + "0": [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GCTA_MAKEBKSPARSE", + "gcta", + "*******************************************************************" + ] + ], + "sparse_grm_files": [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_MAKEBKSPARSE", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:14:16.97479623" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/makebksparse/tests/nextflow.config b/modules/nf-core/gcta/makebksparse/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} diff --git a/modules/nf-core/gcta/makegrmpart/environment.yml b/modules/nf-core/gcta/makegrmpart/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/makegrmpart/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/makegrmpart/main.nf b/modules/nf-core/gcta/makegrmpart/main.nf new file mode 100644 index 000000000000..6a2552c8a5a2 --- /dev/null +++ b/modules/nf-core/gcta/makegrmpart/main.nf @@ -0,0 +1,47 @@ +process GCTA_MAKEGRMPART { + tag "part ${meta.part_gcta_job} of ${meta.nparts_gcta} (${meta.id})" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : + 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + + input: + tuple val(meta), path(mfile), path(bed_pgen), path(bim_pvar), path(fam_psam) + tuple val(meta2), path(snp_group_file) + + output: + tuple val(meta), path("${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.id"), path("${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.bin"), path("${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.N.bin"), emit: grm_files + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def part_gcta_job = meta.part_gcta_job + def nparts_gcta = meta.nparts_gcta + def extract_cmd = snp_group_file ? "--extract ${snp_group_file}" : '' + def extra_args = task.ext.args ?: '' + def genotype_files = bed_pgen instanceof List ? bed_pgen : [bed_pgen] + def genotype_extension = genotype_files[0].name.tokenize('.').last() + def multi_file_flag = genotype_extension == 'pgen' ? '--mpfile' : '--mbfile' + + """ + set -euo pipefail + + gcta \\ + ${multi_file_flag} ${mfile} \\ + --make-grm-part ${nparts_gcta} ${part_gcta_job} \\ + ${extract_cmd} \\ + --maf 0.01 \\ + --thread-num ${task.cpus} \\ + --out ${meta.id} ${extra_args} + """ + + stub: + """ + touch ${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.id + touch ${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.bin + touch ${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.N.bin + """ +} diff --git a/modules/nf-core/gcta/makegrmpart/meta.yml b/modules/nf-core/gcta/makegrmpart/meta.yml new file mode 100644 index 000000000000..fbcf0aef6c4d --- /dev/null +++ b/modules/nf-core/gcta/makegrmpart/meta.yml @@ -0,0 +1,100 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_makegrmpart" +description: Compute one partition of a GCTA genetic relationship matrix +keywords: + - gcta + - grm + - genetics +tools: + - "gcta": + description: "GCTA is a tool for genome-wide complex trait analysis." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://github.com/jianyangqt/gcta" + licence: ["GPL-3.0-only"] +input: + - - meta: + type: map + description: | + Groovy Map containing GRM-partition job metadata + e.g. `[ id:'gcta_grm', part_gcta_job:1, nparts_gcta:2 ]` + - mfile: + type: file + description: GCTA multi-input manifest consumed by `--mbfile` or `--mpfile` + pattern: "*.{mbfile,mpfile,txt}" + ontologies: + - edam: "http://edamontology.org/format_2330" + - bed_pgen: + type: file + description: Collection of PLINK primary genotype files referenced by the multi-input manifest + pattern: "*.{bed,pgen}" + ontologies: + - edam: "http://edamontology.org/format_3003" + - bim_pvar: + type: file + description: Collection of PLINK variant metadata files referenced by the multi-input manifest + pattern: "*.{bim,pvar}" + ontologies: [] + - fam_psam: + type: file + description: Collection of PLINK sample metadata files referenced by the multi-input manifest + pattern: "*.{fam,psam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing SNP-selection metadata + e.g. `[ id:'snp_group1', snp_group:1 ]` + - snp_group_file: + type: file + description: Optional SNP extraction file passed to `--extract`; provide `[]` when absent + pattern: "*.{txt,list}" + ontologies: + - edam: "http://edamontology.org/format_2330" +output: + grm_files: + - - meta: + type: map + description: | + Groovy Map containing GRM-partition job metadata + e.g. `[ id:'gcta_grm', part_gcta_job:1, nparts_gcta:2 ]` + - "${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.id": + type: file + description: Partitioned GRM ID file + pattern: "${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.id" + ontologies: [] + - "${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.bin": + type: file + description: Partitioned GRM binary matrix file + pattern: "${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.bin" + ontologies: [] + - "${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.N.bin": + type: file + description: Partitioned GRM sample-count matrix file + pattern: "${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.N.bin" + ontologies: [] + versions_gcta: + - - "${task.process}": + type: string + description: The process the versions were collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | head -n 1": + type: eval + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | head -n 1: + type: eval + description: The command used to generate the version of the tool +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/makegrmpart/tests/main.nf.test b/modules/nf-core/gcta/makegrmpart/tests/main.nf.test new file mode 100644 index 000000000000..e8bf773bdf82 --- /dev/null +++ b/modules/nf-core/gcta/makegrmpart/tests/main.nf.test @@ -0,0 +1,87 @@ +nextflow_process { + + name "Test Process GCTA_MAKEGRMPART" + script "../main.nf" + process "GCTA_MAKEGRMPART" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/makegrmpart" + + test("homo_sapiens popgen - plink2") { + config "./nextflow.config" + + when { + process { + """ + file('gcta_grm.mpfile').text = 'plink_simulated plink_simulated.pgen plink_simulated.psam plink_simulated.pvar\\n' + + input[0] = [ + [ id:'gcta_grm', part_gcta_job:1, nparts_gcta:2 ], + file('gcta_grm.mpfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pgen', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pvar', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.psam', checkIfExists: true) + ] + ] + input[1] = [[ id:'snp_group0' ], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.grm_files.size() == 1 }, + { assert process.out.grm_files.get(0).get(0).id == 'gcta_grm' }, + { + assert snapshot( + process.out.grm_files, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - plink1 - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + file('gcta_grm.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'gcta_grm_bed', part_gcta_job:1, nparts_gcta:2 ], + file('gcta_grm.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [[ id:'snp_group0' ], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/makegrmpart/tests/main.nf.test.snap b/modules/nf-core/gcta/makegrmpart/tests/main.nf.test.snap new file mode 100644 index 000000000000..49ba14aa5f41 --- /dev/null +++ b/modules/nf-core/gcta/makegrmpart/tests/main.nf.test.snap @@ -0,0 +1,81 @@ +{ + "homo_sapiens popgen - plink2": { + "content": [ + [ + [ + { + "id": "gcta_grm", + "part_gcta_job": 1, + "nparts_gcta": 2 + }, + "gcta_grm.part_2_1.grm.id:md5,9c193413bbf336213da941abeee78718", + "gcta_grm.part_2_1.grm.bin:md5,b683a1daa96406174c02156527da1f19", + "gcta_grm.part_2_1.grm.N.bin:md5,0dcc3200354c243fca2de4c023352e66" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_MAKEGRMPART", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T13:29:12.139953008" + }, + "homo_sapiens popgen - plink1 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "gcta_grm_bed", + "part_gcta_job": 1, + "nparts_gcta": 2 + }, + "gcta_grm_bed.part_2_1.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.part_2_1.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.part_2_1.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GCTA_MAKEGRMPART", + "gcta", + "*******************************************************************" + ] + ], + "grm_files": [ + [ + { + "id": "gcta_grm_bed", + "part_gcta_job": 1, + "nparts_gcta": 2 + }, + "gcta_grm_bed.part_2_1.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.part_2_1.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.part_2_1.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_MAKEGRMPART", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T13:29:17.993546066" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/makegrmpart/tests/nextflow.config b/modules/nf-core/gcta/makegrmpart/tests/nextflow.config new file mode 100644 index 000000000000..76b9ab148074 --- /dev/null +++ b/modules/nf-core/gcta/makegrmpart/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv('NF_MODULES_TESTDATA_BASE_PATH') ?: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' +} diff --git a/modules/nf-core/gcta/reml/environment.yml b/modules/nf-core/gcta/reml/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/reml/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/reml/main.nf b/modules/nf-core/gcta/reml/main.nf new file mode 100644 index 000000000000..f8c7b6fccb86 --- /dev/null +++ b/modules/nf-core/gcta/reml/main.nf @@ -0,0 +1,45 @@ +process GCTA_REML { + tag "gcta_reml_${meta.id}_${meta2.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : + 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + + input: + tuple val(meta), path(phenotypes_file) + tuple val(meta2), path(grm_id), path(grm_bin), path(grm_n_bin) + tuple val(meta3), path(quant_covariates_file) + tuple val(meta4), path(cat_covariates_file) + + output: + tuple val(meta), path("${meta.id}.hsq"), emit: reml_results + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def qcovar_param = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : '' + def covar_param = cat_covariates_file ? "--covar ${cat_covariates_file}" : '' + def extra_args = task.ext.args ?: '' + + """ + set -euo pipefail + + gcta \\ + --reml \\ + --grm ${meta2.id} \\ + --pheno ${phenotypes_file} \\ + ${qcovar_param} \\ + ${covar_param} \\ + --out "${meta.id}" \\ + --thread-num ${task.cpus} ${extra_args} + """ + + stub: + """ + touch "${meta.id}.hsq" + """ +} diff --git a/modules/nf-core/gcta/reml/meta.yml b/modules/nf-core/gcta/reml/meta.yml new file mode 100644 index 000000000000..3f3fb01cf4fc --- /dev/null +++ b/modules/nf-core/gcta/reml/meta.yml @@ -0,0 +1,105 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_reml" +description: Run univariate REML heritability estimation with a dense GRM +keywords: + - gcta + - reml + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" +input: + - - meta: + type: map + description: | + Groovy map containing phenotype metadata + e.g. `[ id:'QuantitativeTrait' ]` + - phenotypes_file: + type: file + description: Phenotype file passed to `--pheno` + pattern: "*.{phe,pheno,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta2: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - grm_id: + type: file + description: Dense GRM sample identifier file + pattern: "*.grm.id" + ontologies: [] + - grm_bin: + type: file + description: Dense GRM binary matrix file + pattern: "*.grm.bin" + ontologies: [] + - grm_n_bin: + type: file + description: Dense GRM sample-count matrix file + pattern: "*.grm.N.bin" + ontologies: [] + - - meta3: + type: map + description: | + Groovy map containing quantitative covariate metadata + e.g. `[ id:'covariates_quant' ]` + - quant_covariates_file: + type: file + description: Quantitative covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta4: + type: map + description: | + Groovy map containing categorical covariate metadata + e.g. `[ id:'covariates_cat' ]` + - cat_covariates_file: + type: file + description: Categorical covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" +output: + reml_results: + - - meta: + type: map + description: | + Groovy map containing phenotype metadata + e.g. `[ id:'QuantitativeTrait' ]` + - "${meta.id}.hsq": + type: file + description: REML result file + pattern: "${meta.id}.hsq" + ontologies: + - edam: "http://edamontology.org/format_2330" + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | head -n 1": + type: eval + description: The command used to retrieve the GCTA version +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | head -n 1: + type: eval + description: The command used to retrieve the GCTA version +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/reml/tests/main.nf.test b/modules/nf-core/gcta/reml/tests/main.nf.test new file mode 100644 index 000000000000..b6365f5b15b5 --- /dev/null +++ b/modules/nf-core/gcta/reml/tests/main.nf.test @@ -0,0 +1,84 @@ +nextflow_process { + + name "Test Process GCTA_REML" + script "../main.nf" + process "GCTA_REML" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/reml" + + test("homo_sapiens popgen - quantitative phenotype with dense GRM and covariates") { + config "./nextflow.config" + when { + process { + """ + input[0] = [ + [ id:"QuantitativeTrait" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_quantitative.noheader.txt", checkIfExists: true) + ] + input[1] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.N.bin", checkIfExists: true) + ] + input[2] = [ + [ id:"covariates_quant" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_covariates.quant.noheader.txt", checkIfExists: true) + ] + input[3] = [ + [ id:"covariates_cat" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_covariates.cat.noheader.txt", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.reml_results.size() == 1 }, + { assert process.out.reml_results.get(0).get(0).id == "QuantitativeTrait" }, + { + assert snapshot( + process.out.reml_results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - quantitative phenotype with dense GRM - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:"QuantitativeTrait" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_quantitative.noheader.txt", checkIfExists: true) + ] + input[1] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.N.bin", checkIfExists: true) + ] + input[2] = [[ id:"covariates_quant" ], []] + input[3] = [[ id:"covariates_cat" ], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/reml/tests/main.nf.test.snap b/modules/nf-core/gcta/reml/tests/main.nf.test.snap new file mode 100644 index 000000000000..01cd4a9f7fb7 --- /dev/null +++ b/modules/nf-core/gcta/reml/tests/main.nf.test.snap @@ -0,0 +1,69 @@ +{ + "homo_sapiens popgen - quantitative phenotype with dense GRM - stub": { + "content": [ + { + "0": [ + [ + { + "id": "QuantitativeTrait" + }, + "QuantitativeTrait.hsq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GCTA_REML", + "gcta", + "*******************************************************************" + ] + ], + "reml_results": [ + [ + { + "id": "QuantitativeTrait" + }, + "QuantitativeTrait.hsq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_REML", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:14:45.245259136" + }, + "homo_sapiens popgen - quantitative phenotype with dense GRM and covariates": { + "content": [ + [ + [ + { + "id": "QuantitativeTrait" + }, + "QuantitativeTrait.hsq:md5,d52dd6a71f21ec446d5dcaefb15b1d8e" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_REML", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:21:15.797101079" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/reml/tests/nextflow.config b/modules/nf-core/gcta/reml/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/reml/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} diff --git a/modules/nf-core/gcta/remlldms/environment.yml b/modules/nf-core/gcta/remlldms/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/remlldms/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/remlldms/main.nf b/modules/nf-core/gcta/remlldms/main.nf new file mode 100644 index 000000000000..0661493da918 --- /dev/null +++ b/modules/nf-core/gcta/remlldms/main.nf @@ -0,0 +1,44 @@ +process GCTA_REMLLDMS { + tag "gcta_reml_ldms_${meta.id}_${meta2.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : + 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + + input: + tuple val(meta), path(phenotypes_file) + tuple val(meta2), path(mgrm_file), path(grm_files) + tuple val(meta3), path(quant_covariates_file) + tuple val(meta4), path(cat_covariates_file) + + output: + tuple val(meta), path("${meta.id}.hsq"), emit: reml_results + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def qcovar_param = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : '' + def covar_param = cat_covariates_file ? "--covar ${cat_covariates_file}" : '' + def extra_args = task.ext.args ?: '' + + """ + set -euo pipefail + + gcta \\ + --reml-no-constrain \\ + --mgrm ${mgrm_file} \\ + --pheno ${phenotypes_file} \\ + ${qcovar_param} \\ + ${covar_param} \\ + --out "${meta.id}" \\ + --thread-num ${task.cpus} ${extra_args} + """ + + stub: + """ + touch "${meta.id}.hsq" + """ +} diff --git a/modules/nf-core/gcta/remlldms/meta.yml b/modules/nf-core/gcta/remlldms/meta.yml new file mode 100644 index 000000000000..438e958632a4 --- /dev/null +++ b/modules/nf-core/gcta/remlldms/meta.yml @@ -0,0 +1,102 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_remlldms" +description: Run REML-LDMS heritability estimation with an MGRM manifest +keywords: + - gcta + - reml + - ldms + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" +input: + - - meta: + type: map + description: | + Groovy map containing phenotype metadata + e.g. `[ id:'QuantitativeTrait' ]` + - phenotypes_file: + type: file + description: Phenotype file passed to `--pheno` + pattern: "*.{phe,pheno,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta2: + type: map + description: | + Groovy map containing MGRM metadata + e.g. `[ id:'plink_simulated_ldms' ]` + - mgrm_file: + type: file + description: MGRM manifest file + pattern: "*.mgrm" + ontologies: + - edam: "http://edamontology.org/format_2330" + - grm_files: + type: file + description: GRM sidecar files referenced by `mgrm_file` + pattern: "*" + ontologies: [] + - - meta3: + type: map + description: | + Groovy map containing quantitative covariate metadata + e.g. `[ id:'covariates_quant' ]` + - quant_covariates_file: + type: file + description: Quantitative covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta4: + type: map + description: | + Groovy map containing categorical covariate metadata + e.g. `[ id:'covariates_cat' ]` + - cat_covariates_file: + type: file + description: Categorical covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" +output: + reml_results: + - - meta: + type: map + description: | + Groovy map containing phenotype metadata + e.g. `[ id:'QuantitativeTrait' ]` + - "${meta.id}.hsq": + type: file + description: REML-LDMS result file + pattern: "${meta.id}.hsq" + ontologies: + - edam: "http://edamontology.org/format_2330" + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | head -n 1": + type: eval + description: The command used to retrieve the GCTA version +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | head -n 1: + type: eval + description: The command used to retrieve the GCTA version +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/remlldms/tests/main.nf.test b/modules/nf-core/gcta/remlldms/tests/main.nf.test new file mode 100644 index 000000000000..4efd400ef0b4 --- /dev/null +++ b/modules/nf-core/gcta/remlldms/tests/main.nf.test @@ -0,0 +1,96 @@ +nextflow_process { + + name "Test Process GCTA_REMLLDMS" + script "../main.nf" + process "GCTA_REMLLDMS" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/remlldms" + + test("homo_sapiens popgen - quantitative phenotype with ldms mgrm and covariates") { + config "./nextflow.config" + when { + process { + """ + input[0] = [ + [ id:"QuantitativeTrait" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_quantitative.noheader.txt", checkIfExists: true) + ] + input[1] = [ + [ id:"plink_simulated_ldms" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms.mgrm", checkIfExists: true), + [ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.N.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.N.bin", checkIfExists: true) + ] + ] + input[2] = [ + [ id:"covariates_quant" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_covariates.quant.noheader.txt", checkIfExists: true) + ] + input[3] = [ + [ id:"covariates_cat" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_covariates.cat.noheader.txt", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.reml_results.size() == 1 }, + { assert process.out.reml_results.get(0).get(0).id == "QuantitativeTrait" }, + { + assert snapshot( + process.out.reml_results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - quantitative phenotype with ldms mgrm - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:"QuantitativeTrait" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_quantitative.noheader.txt", checkIfExists: true) + ] + input[1] = [ + [ id:"plink_simulated_ldms" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms.mgrm", checkIfExists: true), + [ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.N.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.N.bin", checkIfExists: true) + ] + ] + input[2] = [[ id:"covariates_quant" ], []] + input[3] = [[ id:"covariates_cat" ], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/remlldms/tests/main.nf.test.snap b/modules/nf-core/gcta/remlldms/tests/main.nf.test.snap new file mode 100644 index 000000000000..eb76250540e4 --- /dev/null +++ b/modules/nf-core/gcta/remlldms/tests/main.nf.test.snap @@ -0,0 +1,69 @@ +{ + "homo_sapiens popgen - quantitative phenotype with ldms mgrm and covariates": { + "content": [ + [ + [ + { + "id": "QuantitativeTrait" + }, + "QuantitativeTrait.hsq:md5,fd2cbee5b278ec127dbbca43abee5810" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_REMLLDMS", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:21:30.18552419" + }, + "homo_sapiens popgen - quantitative phenotype with ldms mgrm - stub": { + "content": [ + { + "0": [ + [ + { + "id": "QuantitativeTrait" + }, + "QuantitativeTrait.hsq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GCTA_REMLLDMS", + "gcta", + "*******************************************************************" + ] + ], + "reml_results": [ + [ + { + "id": "QuantitativeTrait" + }, + "QuantitativeTrait.hsq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_REMLLDMS", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:14:59.682890873" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/remlldms/tests/nextflow.config b/modules/nf-core/gcta/remlldms/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/remlldms/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} diff --git a/modules/nf-core/gcta/removerelatedsubjects/environment.yml b/modules/nf-core/gcta/removerelatedsubjects/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/removerelatedsubjects/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/removerelatedsubjects/main.nf b/modules/nf-core/gcta/removerelatedsubjects/main.nf new file mode 100644 index 000000000000..cf81faeab04b --- /dev/null +++ b/modules/nf-core/gcta/removerelatedsubjects/main.nf @@ -0,0 +1,39 @@ +process GCTA_REMOVERELATEDSUBJECTS { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : + 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + + input: + tuple val(meta), path(grm_id), path(grm_bin), path(grm_n_bin) + + output: + tuple val(meta), path("${meta.id}_unrel05.grm.id"), path("${meta.id}_unrel05.grm.bin"), path("${meta.id}_unrel05.grm.N.bin"), emit: grm_files + tuple val(meta), path("${meta.id}_unrel05.grm.id"), emit: keep_file + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + gcta \\ + --grm ${meta.id} \\ + --grm-cutoff 0.05 \\ + --make-grm \\ + --out ${meta.id}_unrel05 \\ + --thread-num ${task.cpus} \\ + ${args} + """ + + stub: + """ + touch ${meta.id}_unrel05.grm.id + touch ${meta.id}_unrel05.grm.bin + touch ${meta.id}_unrel05.grm.N.bin + """ +} diff --git a/modules/nf-core/gcta/removerelatedsubjects/meta.yml b/modules/nf-core/gcta/removerelatedsubjects/meta.yml new file mode 100644 index 000000000000..d7864252d034 --- /dev/null +++ b/modules/nf-core/gcta/removerelatedsubjects/meta.yml @@ -0,0 +1,96 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_removerelatedsubjects" +description: Remove related individuals from a dense GRM using `gcta --grm-cutoff` +keywords: + - gcta + - grm + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + +input: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - grm_id: + type: file + description: Dense GRM sample identifier file + pattern: "*.grm.id" + ontologies: [] + - grm_bin: + type: file + description: Dense GRM binary matrix file + pattern: "*.grm.bin" + ontologies: [] + - grm_n_bin: + type: file + description: Dense GRM sample-count matrix file + pattern: "*.grm.N.bin" + ontologies: [] + +output: + grm_files: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - "${meta.id}_unrel05.grm.id": + type: file + description: Relatedness-filtered GRM sample identifier file + pattern: "${meta.id}_unrel05.grm.id" + ontologies: [] + - "${meta.id}_unrel05.grm.bin": + type: file + description: Relatedness-filtered GRM binary matrix file + pattern: "${meta.id}_unrel05.grm.bin" + ontologies: [] + - "${meta.id}_unrel05.grm.N.bin": + type: file + description: Relatedness-filtered GRM sample-count matrix file + pattern: "${meta.id}_unrel05.grm.N.bin" + ontologies: [] + keep_file: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - "${meta.id}_unrel05.grm.id": + type: file + description: Keep file of unrelated individuals emitted by GCTA + pattern: "${meta.id}_unrel05.grm.id" + ontologies: [] + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | head -n 1": + type: eval + description: The command used to retrieve the GCTA version + +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | head -n 1: + type: eval + description: The command used to retrieve the GCTA version + +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test b/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test new file mode 100644 index 000000000000..52ad6ef3cd09 --- /dev/null +++ b/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test @@ -0,0 +1,69 @@ +nextflow_process { + + name "Test Process GCTA_REMOVERELATEDSUBJECTS" + script "../main.nf" + process "GCTA_REMOVERELATEDSUBJECTS" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/removerelatedsubjects" + + test("homo_sapiens popgen - remove related individuals from dense GRM") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.N.bin", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.grm_files.size() == 1 }, + { assert process.out.keep_file.size() == 1 }, + { assert process.out.grm_files.get(0).get(0).id == "plink_simulated" }, + { + assert snapshot( + process.out.grm_files, + process.out.keep_file, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - remove related individuals from dense GRM - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.id", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.bin", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.N.bin", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test.snap b/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test.snap new file mode 100644 index 000000000000..6b0831a1c210 --- /dev/null +++ b/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test.snap @@ -0,0 +1,99 @@ +{ + "homo_sapiens popgen - remove related individuals from dense GRM": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_unrel05.grm.id:md5,ca8c0bded6951fdd3bf0dddc97b6df6b", + "plink_simulated_unrel05.grm.bin:md5,b1f124463eecbae86840a6651eec372d", + "plink_simulated_unrel05.grm.N.bin:md5,06b73ea8bae8f1e5f5d4de33dbd2c75e" + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_unrel05.grm.id:md5,ca8c0bded6951fdd3bf0dddc97b6df6b" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_REMOVERELATEDSUBJECTS", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:21:44.229406402" + }, + "homo_sapiens popgen - remove related individuals from dense GRM - stub": { + "content": [ + { + "0": [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_unrel05.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_unrel05.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_unrel05.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_unrel05.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "GCTA_REMOVERELATEDSUBJECTS", + "gcta", + "*******************************************************************" + ] + ], + "grm_files": [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_unrel05.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_unrel05.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_unrel05.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "keep_file": [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_unrel05.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_REMOVERELATEDSUBJECTS", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:15:17.5409981" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/removerelatedsubjects/tests/nextflow.config b/modules/nf-core/gcta/removerelatedsubjects/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/removerelatedsubjects/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} From d84b94ff987ab810d9cca7b9b0b7ff1d0b392dd9 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Fri, 13 Mar 2026 16:30:51 +0800 Subject: [PATCH 2/9] test: reuse shared datasets in gcta module tests --- .../nf-core/gcta/addgrms/tests/main.nf.test | 120 ++++++++--- .../gcta/addgrms/tests/main.nf.test.snap | 4 +- .../nf-core/gcta/adjustgrm/tests/main.nf.test | 53 +++-- .../gcta/adjustgrm/tests/main.nf.test.snap | 28 +-- .../gcta/bivariatereml/tests/main.nf.test | 117 ++++++++--- .../bivariatereml/tests/main.nf.test.snap | 4 +- .../gcta/bivariateremlldms/tests/main.nf.test | 177 +++++++++++++--- .../bivariateremlldms/tests/main.nf.test.snap | 4 +- .../nf-core/gcta/calculateldscores/main.nf | 18 +- .../gcta/calculateldscores/tests/main.nf.test | 26 +-- .../calculateldscores/tests/main.nf.test.snap | 95 ++++----- .../calculateldscores/tests/nextflow.config | 7 + .../nf-core/gcta/fastgwa/tests/main.nf.test | 130 +++++++++--- .../gcta/fastgwa/tests/main.nf.test.snap | 4 +- .../gcta/filtergrmwithkeep/tests/main.nf.test | 63 ++++-- .../filtergrmwithkeep/tests/main.nf.test.snap | 28 +-- .../gcta/makebksparse/tests/main.nf.test | 57 ++++-- .../gcta/makebksparse/tests/main.nf.test.snap | 22 +- modules/nf-core/gcta/reml/tests/main.nf.test | 114 ++++++++--- .../nf-core/gcta/reml/tests/main.nf.test.snap | 4 +- .../nf-core/gcta/remlldms/tests/main.nf.test | 192 ++++++++++++++---- .../gcta/remlldms/tests/main.nf.test.snap | 58 +++--- .../removerelatedsubjects/tests/main.nf.test | 53 +++-- .../tests/main.nf.test.snap | 40 ++-- 24 files changed, 1009 insertions(+), 409 deletions(-) diff --git a/modules/nf-core/gcta/addgrms/tests/main.nf.test b/modules/nf-core/gcta/addgrms/tests/main.nf.test index b74521b5fea6..db96ae31cee2 100644 --- a/modules/nf-core/gcta/addgrms/tests/main.nf.test +++ b/modules/nf-core/gcta/addgrms/tests/main.nf.test @@ -8,25 +8,98 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/addgrms" + tag "gcta/makegrmpart" + tag "gawk" - test("homo_sapiens popgen - merge dense GRMs from mgrm") { - config "./nextflow.config" + setup { + run("GAWK", alias: "GAWK_COMPLEMENT_VARIANTS") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'plink_simulated_complement' ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_random_selected_snp.txt', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ] + ] + input[1] = Channel.of('FNR == NR { keep[\$1] = 1; next } !(\$2 in keep) { print \$2 }').collectFile(name:'complement_variants.awk') + input[2] = false + """ + } + } - when { + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS1") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_ldms1', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [ + [ id:'plink_random_selected_snp' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_random_selected_snp.txt', checkIfExists: true) + ] + """ + } + } + + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS2") { + script "../../makegrmpart/main.nf" process { """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + input[0] = [ - [ id:"plink_simulated_ldms" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms.mgrm", checkIfExists: true), + [ id:'plink_simulated_ldms2', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), [ - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.N.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.N.bin", checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) ] ] + input[1] = GAWK_COMPLEMENT_VARIANTS.out.output + """ + } + } + } + + test("homo_sapiens popgen - merge dense GRMs from mgrm") { + config "./nextflow.config" + + when { + process { + """ + mgrm_file = Channel + .of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1') + .collectFile(name:'plink_simulated_ldms.mgrm', newLine: true) + + grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files + .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files) + .map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] } + .collect() + .map { rows -> rows.flatten() } + + input[0] = mgrm_file + .combine(grm_files) + .map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] } """ } } @@ -53,18 +126,19 @@ nextflow_process { when { process { """ - input[0] = [ - [ id:"plink_simulated_ldms" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms.mgrm", checkIfExists: true), - [ - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.N.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.N.bin", checkIfExists: true) - ] - ] + mgrm_file = Channel + .of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1') + .collectFile(name:'plink_simulated_ldms.mgrm', newLine: true) + + grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files + .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files) + .map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] } + .collect() + .map { rows -> rows.flatten() } + + input[0] = mgrm_file + .combine(grm_files) + .map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] } """ } } diff --git a/modules/nf-core/gcta/addgrms/tests/main.nf.test.snap b/modules/nf-core/gcta/addgrms/tests/main.nf.test.snap index ef4571ed193a..c25e7e06baec 100644 --- a/modules/nf-core/gcta/addgrms/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/addgrms/tests/main.nf.test.snap @@ -52,7 +52,7 @@ "id": "plink_simulated_ldms" }, "plink_simulated_ldms.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", - "plink_simulated_ldms.grm.bin:md5,59a9d628e3fb4b9488244048c952b2ca", + "plink_simulated_ldms.grm.bin:md5,e6a56e44acd03f87043435c382fe0149", "plink_simulated_ldms.grm.N.bin:md5,acaa43bbbf2253d392537a178ecf09a4" ] ], @@ -70,6 +70,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T14:19:21.015064654" + "timestamp": "2026-03-13T15:38:05.74494821" } } \ No newline at end of file diff --git a/modules/nf-core/gcta/adjustgrm/tests/main.nf.test b/modules/nf-core/gcta/adjustgrm/tests/main.nf.test index 64b805325d05..76be2d28d935 100644 --- a/modules/nf-core/gcta/adjustgrm/tests/main.nf.test +++ b/modules/nf-core/gcta/adjustgrm/tests/main.nf.test @@ -8,6 +8,33 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/adjustgrm" + tag "gcta/makegrmpart" + + setup { + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [[ id:'all_variants' ], []] + """ + } + } + } test("homo_sapiens popgen - adjust dense GRM") { config "./nextflow.config" @@ -15,12 +42,12 @@ nextflow_process { when { process { """ - input[0] = [ - [ id:"plink_simulated" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.N.bin", checkIfExists: true) - ] + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm """ } } @@ -29,7 +56,7 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.grm_files.size() == 1 }, - { assert process.out.grm_files.get(0).get(0).id == "plink_simulated" }, + { assert process.out.grm_files.get(0).get(0).id == "plink_simulated_dense.part_1_1" }, { assert snapshot( process.out.grm_files, @@ -47,12 +74,12 @@ nextflow_process { when { process { """ - input[0] = [ - [ id:"plink_simulated" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.N.bin", checkIfExists: true) - ] + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm """ } } diff --git a/modules/nf-core/gcta/adjustgrm/tests/main.nf.test.snap b/modules/nf-core/gcta/adjustgrm/tests/main.nf.test.snap index a2d574cecdc2..6ed461bbd2c6 100644 --- a/modules/nf-core/gcta/adjustgrm/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/adjustgrm/tests/main.nf.test.snap @@ -5,11 +5,11 @@ "0": [ [ { - "id": "plink_simulated" + "id": "plink_simulated_dense.part_1_1" }, - "plink_simulated_adj.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", - "plink_simulated_adj.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", - "plink_simulated_adj.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + "plink_simulated_dense.part_1_1_adj.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_adj.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_adj.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ @@ -22,11 +22,11 @@ "grm_files": [ [ { - "id": "plink_simulated" + "id": "plink_simulated_dense.part_1_1" }, - "plink_simulated_adj.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", - "plink_simulated_adj.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", - "plink_simulated_adj.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + "plink_simulated_dense.part_1_1_adj.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_adj.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_adj.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions_gcta": [ @@ -42,18 +42,18 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T13:52:09.111456691" + "timestamp": "2026-03-13T15:35:38.715590031" }, "homo_sapiens popgen - adjust dense GRM": { "content": [ [ [ { - "id": "plink_simulated" + "id": "plink_simulated_dense.part_1_1" }, - "plink_simulated_adj.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", - "plink_simulated_adj.grm.bin:md5,6d16a365bd94b621963769e8314eeaa0", - "plink_simulated_adj.grm.N.bin:md5,acaa43bbbf2253d392537a178ecf09a4" + "plink_simulated_dense.part_1_1_adj.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "plink_simulated_dense.part_1_1_adj.grm.bin:md5,6d16a365bd94b621963769e8314eeaa0", + "plink_simulated_dense.part_1_1_adj.grm.N.bin:md5,acaa43bbbf2253d392537a178ecf09a4" ] ], { @@ -70,6 +70,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T14:19:40.424594248" + "timestamp": "2026-03-13T15:35:32.503491346" } } \ No newline at end of file diff --git a/modules/nf-core/gcta/bivariatereml/tests/main.nf.test b/modules/nf-core/gcta/bivariatereml/tests/main.nf.test index e1a7dba423c6..442abd673f1f 100644 --- a/modules/nf-core/gcta/bivariatereml/tests/main.nf.test +++ b/modules/nf-core/gcta/bivariatereml/tests/main.nf.test @@ -7,30 +7,94 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/bivariatereml" + tag "gcta/makegrmpart" + tag "gawk" - test("homo_sapiens popgen - bivariate phenotype with dense GRM and covariates") { - config "./nextflow.config" - when { + setup { + run("GAWK", alias: "GAWK_BIVARIATE_PHENO") { + script "../../../gawk/main.nf" process { """ input[0] = [ - [ id:"Trait1__Trait2" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_bivariate.noheader.txt", checkIfExists: true) + [ id:'Trait1__Trait2' ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] ] - input[1] = [ - [ id:"gcta_grm_0" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_0.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_0.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_0.grm.N.bin", checkIfExists: true) + input[1] = Channel.of('FNR == NR { if (FNR == 1) next; trait1[\$2] = \$3; next } FNR == 1 { next } { print \$1, \$2, trait1[\$2], \$4 }').collectFile(name:'bivariate_phenotype.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_quant' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) ] - input[2] = [ - [ id:"covariates_quant" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_covariates.quant.noheader.txt", checkIfExists: true) + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$5, \$6 }').collectFile(name:'quantitative_covariates.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_CATEGORICAL_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_cat' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) ] - input[3] = [ - [ id:"covariates_cat" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_covariates.cat.noheader.txt", checkIfExists: true) + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'categorical_covariates.awk') + input[2] = false + """ + } + } + + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] ] + input[1] = [[ id:'all_variants' ], []] + """ + } + } + } + + test("homo_sapiens popgen - bivariate phenotype with dense GRM and covariates") { + config "./nextflow.config" + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = GAWK_BIVARIATE_PHENO.out.output + input[1] = dense_grm + input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[3] = GAWK_CATEGORICAL_COVARIATES.out.output """ } } @@ -60,18 +124,15 @@ nextflow_process { when { process { """ - input[0] = [ - [ id:"Trait1__Trait2" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_bivariate.noheader.txt", checkIfExists: true) - ] - input[1] = [ - [ id:"gcta_grm_0" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_0.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_0.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_0.grm.N.bin", checkIfExists: true) - ] - input[2] = [[ id:"covariates_quant" ], []] - input[3] = [[ id:"covariates_cat" ], []] + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = GAWK_BIVARIATE_PHENO.out.output + input[1] = dense_grm + input[2] = [[ id:'covariates_quant' ], []] + input[3] = [[ id:'covariates_cat' ], []] """ } } diff --git a/modules/nf-core/gcta/bivariatereml/tests/main.nf.test.snap b/modules/nf-core/gcta/bivariatereml/tests/main.nf.test.snap index 8eb6d074f34f..aeab61112211 100644 --- a/modules/nf-core/gcta/bivariatereml/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/bivariatereml/tests/main.nf.test.snap @@ -6,7 +6,7 @@ { "id": "Trait1__Trait2" }, - "Trait1__Trait2.hsq:md5,8cca31b34b14613e781bc840040380ba" + "Trait1__Trait2.hsq:md5,4fe310d5073a497f459e33ee7aa357a4" ] ], { @@ -23,7 +23,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T14:43:35.812149857" + "timestamp": "2026-03-13T15:37:23.03169267" }, "homo_sapiens popgen - bivariate phenotype with dense GRM - stub": { "content": [ diff --git a/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test b/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test index b0b456ba935a..bdba09994011 100644 --- a/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test +++ b/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test @@ -7,37 +7,149 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/bivariateremlldms" + tag "gcta/makegrmpart" + tag "gawk" - test("homo_sapiens popgen - bivariate phenotype with ldms mgrm and covariates") { - config "./nextflow.config" - when { + setup { + run("GAWK", alias: "GAWK_BIVARIATE_PHENO") { + script "../../../gawk/main.nf" process { """ input[0] = [ - [ id:"Trait1__Trait2" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_bivariate.noheader.txt", checkIfExists: true) + [ id:'Trait1__Trait2' ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] ] - input[1] = [ - [ id:"gcta_bivariate_ldms" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_bivariate_ldms.mgrm", checkIfExists: true), + input[1] = Channel.of('FNR == NR { if (FNR == 1) next; trait1[\$2] = \$3; next } FNR == 1 { next } { print \$1, \$2, trait1[\$2], \$4 }').collectFile(name:'bivariate_phenotype.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_quant' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$5, \$6 }').collectFile(name:'quantitative_covariates.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_CATEGORICAL_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_cat' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'categorical_covariates.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_COMPLEMENT_VARIANTS") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'plink_simulated_complement' ], [ - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm.grm.N.bin", checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_random_selected_snp.txt', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) ] ] - input[2] = [ - [ id:"covariates_quant" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_covariates.quant.noheader.txt", checkIfExists: true) + input[1] = Channel.of('FNR == NR { keep[\$1] = 1; next } !(\$2 in keep) { print \$2 }').collectFile(name:'complement_variants.awk') + input[2] = false + """ + } + } + + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS1") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_ldms1', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] ] - input[3] = [ - [ id:"covariates_cat" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_covariates.cat.noheader.txt", checkIfExists: true) + input[1] = [ + [ id:'plink_random_selected_snp' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_random_selected_snp.txt', checkIfExists: true) ] """ } } + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS2") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_ldms2', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = GAWK_COMPLEMENT_VARIANTS.out.output + """ + } + } + } + + test("homo_sapiens popgen - bivariate phenotype with ldms mgrm and covariates") { + config "./nextflow.config" + when { + process { + """ + mgrm_file = Channel + .of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1') + .collectFile(name:'plink_simulated_ldms.mgrm', newLine: true) + + ldms_grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files + .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files) + .map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] } + .collect() + .map { rows -> rows.flatten() } + + input[0] = GAWK_BIVARIATE_PHENO.out.output + input[1] = mgrm_file + .combine(ldms_grm_files) + .map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] } + input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[3] = GAWK_CATEGORICAL_COVARIATES.out.output + """ + } + } + then { assertAll( { assert process.success }, @@ -63,21 +175,22 @@ nextflow_process { when { process { """ - input[0] = [ - [ id:"Trait1__Trait2" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm_bivariate.noheader.txt", checkIfExists: true) - ] - input[1] = [ - [ id:"gcta_bivariate_ldms" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_bivariate_ldms.mgrm", checkIfExists: true), - [ - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/gcta_grm.grm.N.bin", checkIfExists: true) - ] - ] - input[2] = [[ id:"covariates_quant" ], []] - input[3] = [[ id:"covariates_cat" ], []] + mgrm_file = Channel + .of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1') + .collectFile(name:'plink_simulated_ldms.mgrm', newLine: true) + + ldms_grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files + .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files) + .map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] } + .collect() + .map { rows -> rows.flatten() } + + input[0] = GAWK_BIVARIATE_PHENO.out.output + input[1] = mgrm_file + .combine(ldms_grm_files) + .map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] } + input[2] = [[ id:'covariates_quant' ], []] + input[3] = [[ id:'covariates_cat' ], []] """ } } diff --git a/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test.snap b/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test.snap index 057cb1baf43a..60ce78e5668d 100644 --- a/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test.snap @@ -6,7 +6,7 @@ { "id": "Trait1__Trait2" }, - "Trait1__Trait2.hsq:md5,5cbdc47d28a46d61567a91003f32c579" + "Trait1__Trait2.hsq:md5,fca157825307e58cb3e0423cc80bd97a" ] ], { @@ -23,7 +23,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T14:43:49.100858578" + "timestamp": "2026-03-13T15:38:31.82813107" }, "homo_sapiens popgen - bivariate phenotype with ldms mgrm - stub": { "content": [ diff --git a/modules/nf-core/gcta/calculateldscores/main.nf b/modules/nf-core/gcta/calculateldscores/main.nf index f26837acda89..48770b12941d 100644 --- a/modules/nf-core/gcta/calculateldscores/main.nf +++ b/modules/nf-core/gcta/calculateldscores/main.nf @@ -19,13 +19,14 @@ process GCTA_CALCULATELDSCORES { script: def extra_args = task.ext.args ?: '' + def ld_score_region = task.ext.ld_score_region ?: 200 """ set -euo pipefail gcta \\ --bfile ${meta.id} \\ - --ld-score-region 200 \\ + --ld-score-region ${ld_score_region} \\ --out ${meta.id}_gcta_ld \\ --thread-num ${task.cpus} ${extra_args} @@ -43,6 +44,11 @@ process GCTA_CALCULATELDSCORES { q2=\$(awk -v idx="\${q2_idx}" 'NR == idx { print \$2 }' "\${sorted_file}") q3=\$(awk -v idx="\${q3_idx}" 'NR == idx { print \$2 }' "\${sorted_file}") + : > "${meta.id}_snp_group1.txt" + : > "${meta.id}_snp_group2.txt" + : > "${meta.id}_snp_group3.txt" + : > "${meta.id}_snp_group4.txt" + awk -v q1="\${q1}" -v q2="\${q2}" -v q3="\${q3}" -v prefix="${meta.id}" ' NR > 1 { if (\$8 <= q1) { @@ -60,10 +66,10 @@ process GCTA_CALCULATELDSCORES { stub: """ - touch ${meta.id}_gcta_ld.score.ld - touch ${meta.id}_snp_group1.txt - touch ${meta.id}_snp_group2.txt - touch ${meta.id}_snp_group3.txt - touch ${meta.id}_snp_group4.txt + printf "SNP\tA1\tA2\tFreq\tb\tse\tp\tldscore\n" > ${meta.id}_gcta_ld.score.ld + printf "stub_snp1\n" > ${meta.id}_snp_group1.txt + printf "stub_snp2\n" > ${meta.id}_snp_group2.txt + printf "stub_snp3\n" > ${meta.id}_snp_group3.txt + printf "stub_snp4\n" > ${meta.id}_snp_group4.txt """ } diff --git a/modules/nf-core/gcta/calculateldscores/tests/main.nf.test b/modules/nf-core/gcta/calculateldscores/tests/main.nf.test index 41704f0b1a54..fc0c18b08058 100644 --- a/modules/nf-core/gcta/calculateldscores/tests/main.nf.test +++ b/modules/nf-core/gcta/calculateldscores/tests/main.nf.test @@ -8,18 +8,19 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/calculateldscores" + tag "plink" - test("homo_sapiens popgen - chr01 plink1") { + test("homo_sapiens gsmr - plink1") { config "./nextflow.config" when { process { """ input[0] = [ - [ id:'chr01.vcf' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/chr01.vcf.bed', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/chr01.vcf.bim', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/chr01.vcf.fam', checkIfExists: true) + [ id:'bfile' ], + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bed', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) ] """ } @@ -30,16 +31,15 @@ nextflow_process { { assert process.success }, { assert process.out.ld_scores.size() == 1 }, { assert process.out.snp_group_files.size() == 1 }, - { assert process.out.ld_scores.get(0).get(0).id == 'chr01.vcf' }, + { assert process.out.ld_scores.get(0).get(0).id == 'bfile' }, { def snpGroups = process.out.snp_group_files.get(0) - assert snpGroups.get(0).id == 'chr01.vcf' + assert snpGroups.get(0).id == 'bfile' assert (1..4).every { idx -> path(snpGroups.get(idx)).exists() } }, { assert snapshot( process.out.ld_scores, - process.out.snp_group_files, process.out.findAll { key, val -> key.startsWith('versions') } ).match() } @@ -47,7 +47,7 @@ nextflow_process { } } - test("homo_sapiens popgen - chr01 plink1 - stub") { + test("homo_sapiens gsmr - plink1 - stub") { options "-stub" config "./nextflow.config" @@ -55,10 +55,10 @@ nextflow_process { process { """ input[0] = [ - [ id:'chr01.vcf' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/chr01.vcf.bed', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/chr01.vcf.bim', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/chr01.vcf.fam', checkIfExists: true) + [ id:'bfile' ], + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bed', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true), + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) ] """ } diff --git a/modules/nf-core/gcta/calculateldscores/tests/main.nf.test.snap b/modules/nf-core/gcta/calculateldscores/tests/main.nf.test.snap index 346a46a6ed9b..5ce5aafa7a90 100644 --- a/modules/nf-core/gcta/calculateldscores/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/calculateldscores/tests/main.nf.test.snap @@ -1,24 +1,50 @@ { - "homo_sapiens popgen - chr01 plink1 - stub": { + "homo_sapiens gsmr - plink1": { + "content": [ + [ + [ + { + "id": "bfile" + }, + "bfile_gcta_ld.score.ld:md5,ccfd5ff1898853f1c02e7572aaa335cf" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_CALCULATELDSCORES", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T16:00:56.198048546" + }, + "homo_sapiens gsmr - plink1 - stub": { "content": [ { "0": [ [ { - "id": "chr01.vcf" + "id": "bfile" }, - "chr01.vcf_gcta_ld.score.ld:md5,d41d8cd98f00b204e9800998ecf8427e" + "bfile_gcta_ld.score.ld:md5,1750d635cb5186c5b09e5e34515cd19d" ] ], "1": [ [ { - "id": "chr01.vcf" + "id": "bfile" }, - "chr01.vcf_snp_group1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "chr01.vcf_snp_group2.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "chr01.vcf_snp_group3.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "chr01.vcf_snp_group4.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + "bfile_snp_group1.txt:md5,a48b3426cc201c1c0be8e11cf34e9365", + "bfile_snp_group2.txt:md5,6d0959c8d45c313c024c6d3fa5c8c630", + "bfile_snp_group3.txt:md5,931517470f35d95b31209c555bd159cf", + "bfile_snp_group4.txt:md5,cce7bf7b3cf0a016872193a1283153ab" ] ], "2": [ @@ -31,20 +57,20 @@ "ld_scores": [ [ { - "id": "chr01.vcf" + "id": "bfile" }, - "chr01.vcf_gcta_ld.score.ld:md5,d41d8cd98f00b204e9800998ecf8427e" + "bfile_gcta_ld.score.ld:md5,1750d635cb5186c5b09e5e34515cd19d" ] ], "snp_group_files": [ [ { - "id": "chr01.vcf" + "id": "bfile" }, - "chr01.vcf_snp_group1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "chr01.vcf_snp_group2.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "chr01.vcf_snp_group3.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "chr01.vcf_snp_group4.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + "bfile_snp_group1.txt:md5,a48b3426cc201c1c0be8e11cf34e9365", + "bfile_snp_group2.txt:md5,6d0959c8d45c313c024c6d3fa5c8c630", + "bfile_snp_group3.txt:md5,931517470f35d95b31209c555bd159cf", + "bfile_snp_group4.txt:md5,cce7bf7b3cf0a016872193a1283153ab" ] ], "versions_gcta": [ @@ -60,43 +86,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T14:16:34.838851258" - }, - "homo_sapiens popgen - chr01 plink1": { - "content": [ - [ - [ - { - "id": "chr01.vcf" - }, - "chr01.vcf_gcta_ld.score.ld:md5,374d4f55f66ff41d4d941ad181114205" - ] - ], - [ - [ - { - "id": "chr01.vcf" - }, - "chr01.vcf_snp_group1.txt:md5,e249439ac7f63ebaccf295b61e08cf7a", - "chr01.vcf_snp_group2.txt:md5,7061c4628a05c528355e9cb068b5cefb", - "chr01.vcf_snp_group3.txt:md5,60195886c40ab436fe623f4c7aa0e323", - "chr01.vcf_snp_group4.txt:md5,0f1cc15d0b496448ed0d55233a443181" - ] - ], - { - "versions_gcta": [ - [ - "GCTA_CALCULATELDSCORES", - "gcta", - "*******************************************************************" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.4" - }, - "timestamp": "2026-03-13T14:16:26.216820259" + "timestamp": "2026-03-13T16:01:03.508961363" } } \ No newline at end of file diff --git a/modules/nf-core/gcta/calculateldscores/tests/nextflow.config b/modules/nf-core/gcta/calculateldscores/tests/nextflow.config index 76b9ab148074..c34dbbbbd82d 100644 --- a/modules/nf-core/gcta/calculateldscores/tests/nextflow.config +++ b/modules/nf-core/gcta/calculateldscores/tests/nextflow.config @@ -1,3 +1,10 @@ params { modules_testdata_base_path = System.getenv('NF_MODULES_TESTDATA_BASE_PATH') ?: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' } + +process { + withName: "GCTA_CALCULATELDSCORES" { + cpus = 1 + ext.ld_score_region = 50 + } +} diff --git a/modules/nf-core/gcta/fastgwa/tests/main.nf.test b/modules/nf-core/gcta/fastgwa/tests/main.nf.test index 016019df9630..5c2a36d578cd 100644 --- a/modules/nf-core/gcta/fastgwa/tests/main.nf.test +++ b/modules/nf-core/gcta/fastgwa/tests/main.nf.test @@ -8,35 +8,112 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/fastgwa" + tag "gcta/makegrmpart" + tag "gcta/makebksparse" + tag "gawk" + + setup { + run("GAWK", alias: "GAWK_QUANTITATIVE_PHENO") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'QuantitativeTrait' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'quantitative_phenotype.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_quant' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$4, \$5, \$6 }').collectFile(name:'quantitative_covariates.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_CATEGORICAL_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_cat' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'categorical_covariates.awk') + input[2] = false + """ + } + } + + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [[ id:'all_variants' ], []] + """ + } + } + + run("GCTA_MAKEBKSPARSE", alias: "GCTA_MAKEBKSPARSE_DENSE") { + script "../../makebksparse/main.nf" + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm + input[1] = Channel.value(0.05) + """ + } + } + } test("homo_sapiens popgen - plink1 with sparse GRM and covariates") { config "./nextflow.config" when { process { """ + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> + [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] + } + input[0] = [ [ id:"plink_simulated" ], file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bed", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true) ] - input[1] = [ - [ id:"plink_simulated_sp" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_sp.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_sp.grm.sp", checkIfExists: true) - ] - input[2] = [ - [ id:"QuantitativeTrait" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_quantitative.noheader.txt", checkIfExists: true) - ] - input[3] = [ - [ id:"covariates_quant" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_covariates.quant.noheader.txt", checkIfExists: true) - ] - input[4] = [ - [ id:"covariates_cat" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_covariates.cat.noheader.txt", checkIfExists: true) - ] + input[1] = sparse_grm + input[2] = GAWK_QUANTITATIVE_PHENO.out.output + input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[4] = GAWK_CATEGORICAL_COVARIATES.out.output """ } } @@ -65,23 +142,20 @@ nextflow_process { when { process { """ + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> + [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] + } + input[0] = [ [ id:"plink_simulated" ], file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pgen", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pvar", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.psam", checkIfExists: true) ] - input[1] = [ - [ id:"plink_simulated_sp" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_sp.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_sp.grm.sp", checkIfExists: true) - ] - input[2] = [ - [ id:"QuantitativeTrait" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_quantitative.noheader.txt", checkIfExists: true) - ] - input[3] = [[ id:"covariates_quant" ], []] - input[4] = [[ id:"covariates_cat" ], []] + input[1] = sparse_grm + input[2] = GAWK_QUANTITATIVE_PHENO.out.output + input[3] = [[ id:'covariates_quant' ], []] + input[4] = [[ id:'covariates_cat' ], []] """ } } diff --git a/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap b/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap index 48c9ca0e7b23..9c01c905e7fb 100644 --- a/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap @@ -56,7 +56,7 @@ { "id": "QuantitativeTrait" }, - "plink_simulated_QuantitativeTrait.fastGWA:md5,09d7c5ed57c214e91bbb21fd49f0eadc" + "plink_simulated_QuantitativeTrait.fastGWA:md5,ba64c9460f412ffa7afb4060eaa029e4" ] ], { @@ -73,6 +73,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T14:20:29.849273904" + "timestamp": "2026-03-13T15:35:49.465395409" } } \ No newline at end of file diff --git a/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test b/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test index 7e8be4435e75..53de4d1bab0c 100644 --- a/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test +++ b/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test @@ -8,19 +8,44 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/filtergrmwithkeep" + tag "gcta/makegrmpart" tag "gcta/removerelatedsubjects" setup { - run("GCTA_REMOVERELATEDSUBJECTS", alias: "GCTA_REMOVERELATEDSUBJECTS_KEEP") { - script "../../removerelatedsubjects/main.nf" + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { + script "../../makegrmpart/main.nf" process { """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + input[0] = [ - [ id:"plink_simulated" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.N.bin", checkIfExists: true) + [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] ] + input[1] = [[ id:'all_variants' ], []] + """ + } + } + + run("GCTA_REMOVERELATEDSUBJECTS", alias: "GCTA_REMOVERELATEDSUBJECTS_KEEP") { + script "../../removerelatedsubjects/main.nf" + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm """ } } @@ -32,12 +57,12 @@ nextflow_process { when { process { """ - input[0] = [ - [ id:"plink_simulated" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.N.bin", checkIfExists: true) - ] + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm input[1] = GCTA_REMOVERELATEDSUBJECTS_KEEP.out.keep_file """ } @@ -47,7 +72,7 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.filtered_grm.size() == 1 }, - { assert process.out.filtered_grm.get(0).get(0).id == "plink_simulated" }, + { assert process.out.filtered_grm.get(0).get(0).id == "plink_simulated_dense.part_1_1" }, { assert snapshot( process.out.filtered_grm, @@ -65,12 +90,12 @@ nextflow_process { when { process { """ - input[0] = [ - [ id:"plink_simulated" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.N.bin", checkIfExists: true) - ] + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm input[1] = GCTA_REMOVERELATEDSUBJECTS_KEEP.out.keep_file """ } diff --git a/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test.snap b/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test.snap index 9d98baf17027..85fbfd385934 100644 --- a/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test.snap @@ -4,11 +4,11 @@ [ [ { - "id": "plink_simulated" + "id": "plink_simulated_dense.part_1_1" }, - "plink_simulated_unrel.grm.id:md5,ca8c0bded6951fdd3bf0dddc97b6df6b", - "plink_simulated_unrel.grm.bin:md5,b1f124463eecbae86840a6651eec372d", - "plink_simulated_unrel.grm.N.bin:md5,06b73ea8bae8f1e5f5d4de33dbd2c75e" + "plink_simulated_dense.part_1_1_unrel.grm.id:md5,ca8c0bded6951fdd3bf0dddc97b6df6b", + "plink_simulated_dense.part_1_1_unrel.grm.bin:md5,b1f124463eecbae86840a6651eec372d", + "plink_simulated_dense.part_1_1_unrel.grm.N.bin:md5,06b73ea8bae8f1e5f5d4de33dbd2c75e" ] ], { @@ -25,7 +25,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T14:20:44.100270115" + "timestamp": "2026-03-13T15:36:03.137610597" }, "homo_sapiens popgen - filter dense GRM with keep file - stub": { "content": [ @@ -33,11 +33,11 @@ "0": [ [ { - "id": "plink_simulated" + "id": "plink_simulated_dense.part_1_1" }, - "plink_simulated_unrel.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", - "plink_simulated_unrel.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", - "plink_simulated_unrel.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + "plink_simulated_dense.part_1_1_unrel.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_unrel.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_unrel.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ @@ -50,11 +50,11 @@ "filtered_grm": [ [ { - "id": "plink_simulated" + "id": "plink_simulated_dense.part_1_1" }, - "plink_simulated_unrel.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", - "plink_simulated_unrel.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", - "plink_simulated_unrel.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + "plink_simulated_dense.part_1_1_unrel.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_unrel.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_unrel.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions_gcta": [ @@ -70,6 +70,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T14:14:02.867340463" + "timestamp": "2026-03-13T15:36:09.583271039" } } \ No newline at end of file diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test b/modules/nf-core/gcta/makebksparse/tests/main.nf.test index 129dce09da5a..37a3a78367ff 100644 --- a/modules/nf-core/gcta/makebksparse/tests/main.nf.test +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test @@ -8,6 +8,33 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/makebksparse" + tag "gcta/makegrmpart" + + setup { + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [[ id:'all_variants' ], []] + """ + } + } + } test("homo_sapiens popgen - create sparse GRM") { config "./nextflow.config" @@ -15,13 +42,13 @@ nextflow_process { when { process { """ - input[0] = [ - [ id:"plink_simulated" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.N.bin", checkIfExists: true) - ] - input[1] = 0.05 + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm + input[1] = Channel.value(0.05) """ } } @@ -30,7 +57,7 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.sparse_grm_files.size() == 1 }, - { assert process.out.sparse_grm_files.get(0).get(0).id == "plink_simulated" }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "plink_simulated_dense.part_1_1" }, { assert snapshot( process.out.sparse_grm_files, @@ -48,13 +75,13 @@ nextflow_process { when { process { """ - input[0] = [ - [ id:"plink_simulated" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.N.bin", checkIfExists: true) - ] - input[1] = 0.05 + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm + input[1] = Channel.value(0.05) """ } } diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap index 6db0c0f9ab7e..8ad77892ba34 100644 --- a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap @@ -4,10 +4,10 @@ [ [ { - "id": "plink_simulated" + "id": "plink_simulated_dense.part_1_1" }, - "plink_simulated_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", - "plink_simulated_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" + "plink_simulated_dense.part_1_1_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "plink_simulated_dense.part_1_1_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" ] ], { @@ -24,7 +24,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T14:20:58.183672595" + "timestamp": "2026-03-13T15:36:15.829559344" }, "homo_sapiens popgen - create sparse GRM - stub": { "content": [ @@ -32,10 +32,10 @@ "0": [ [ { - "id": "plink_simulated" + "id": "plink_simulated_dense.part_1_1" }, - "plink_simulated_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", - "plink_simulated_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" + "plink_simulated_dense.part_1_1_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ @@ -48,10 +48,10 @@ "sparse_grm_files": [ [ { - "id": "plink_simulated" + "id": "plink_simulated_dense.part_1_1" }, - "plink_simulated_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", - "plink_simulated_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" + "plink_simulated_dense.part_1_1_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions_gcta": [ @@ -67,6 +67,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T14:14:16.97479623" + "timestamp": "2026-03-13T15:36:22.049066879" } } \ No newline at end of file diff --git a/modules/nf-core/gcta/reml/tests/main.nf.test b/modules/nf-core/gcta/reml/tests/main.nf.test index b6365f5b15b5..274a986f35e8 100644 --- a/modules/nf-core/gcta/reml/tests/main.nf.test +++ b/modules/nf-core/gcta/reml/tests/main.nf.test @@ -8,30 +8,91 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/reml" + tag "gcta/makegrmpart" + tag "gawk" - test("homo_sapiens popgen - quantitative phenotype with dense GRM and covariates") { - config "./nextflow.config" - when { + setup { + run("GAWK", alias: "GAWK_QUANTITATIVE_PHENO") { + script "../../../gawk/main.nf" process { """ input[0] = [ - [ id:"QuantitativeTrait" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_quantitative.noheader.txt", checkIfExists: true) + [ id:'QuantitativeTrait' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) ] - input[1] = [ - [ id:"plink_simulated" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.N.bin", checkIfExists: true) + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'quantitative_phenotype.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_quant' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) ] - input[2] = [ - [ id:"covariates_quant" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_covariates.quant.noheader.txt", checkIfExists: true) + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$4, \$5, \$6 }').collectFile(name:'quantitative_covariates.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_CATEGORICAL_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_cat' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) ] - input[3] = [ - [ id:"covariates_cat" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_covariates.cat.noheader.txt", checkIfExists: true) + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'categorical_covariates.awk') + input[2] = false + """ + } + } + + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] ] + input[1] = [[ id:'all_variants' ], []] + """ + } + } + } + + test("homo_sapiens popgen - quantitative phenotype with dense GRM and covariates") { + config "./nextflow.config" + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = GAWK_QUANTITATIVE_PHENO.out.output + input[1] = dense_grm + input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[3] = GAWK_CATEGORICAL_COVARIATES.out.output """ } } @@ -58,18 +119,15 @@ nextflow_process { when { process { """ - input[0] = [ - [ id:"QuantitativeTrait" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_quantitative.noheader.txt", checkIfExists: true) - ] - input[1] = [ - [ id:"plink_simulated" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.N.bin", checkIfExists: true) - ] - input[2] = [[ id:"covariates_quant" ], []] - input[3] = [[ id:"covariates_cat" ], []] + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = GAWK_QUANTITATIVE_PHENO.out.output + input[1] = dense_grm + input[2] = [[ id:'covariates_quant' ], []] + input[3] = [[ id:'covariates_cat' ], []] """ } } diff --git a/modules/nf-core/gcta/reml/tests/main.nf.test.snap b/modules/nf-core/gcta/reml/tests/main.nf.test.snap index 01cd4a9f7fb7..7eef71fdd6eb 100644 --- a/modules/nf-core/gcta/reml/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/reml/tests/main.nf.test.snap @@ -47,7 +47,7 @@ { "id": "QuantitativeTrait" }, - "QuantitativeTrait.hsq:md5,d52dd6a71f21ec446d5dcaefb15b1d8e" + "QuantitativeTrait.hsq:md5,43dbe0c6efdafaaf2a19819c9a47e2d5" ] ], { @@ -64,6 +64,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T14:21:15.797101079" + "timestamp": "2026-03-13T15:36:28.705452813" } } \ No newline at end of file diff --git a/modules/nf-core/gcta/remlldms/tests/main.nf.test b/modules/nf-core/gcta/remlldms/tests/main.nf.test index 4efd400ef0b4..02ce8f2bcd56 100644 --- a/modules/nf-core/gcta/remlldms/tests/main.nf.test +++ b/modules/nf-core/gcta/remlldms/tests/main.nf.test @@ -8,36 +8,150 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/remlldms" + tag "gcta/makegrmpart" + tag "gawk" - test("homo_sapiens popgen - quantitative phenotype with ldms mgrm and covariates") { - config "./nextflow.config" - when { + setup { + run("GAWK", alias: "GAWK_QUANTITATIVE_PHENO") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'QuantitativeTrait' ], + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) + ] + input[1] = Channel.of('{ print \$1, \$2, ((NR % 11) - 5) + (((NR * NR) % 7) / 10.0) }').collectFile(name:'quantitative_phenotype.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_quant' ], + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) + ] + input[1] = Channel.of('{ print \$1, \$2, ((NR % 9) - 4) / 3.0 }').collectFile(name:'quantitative_covariates.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_CATEGORICAL_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_cat' ], + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) + ] + input[1] = Channel.of('{ print \$1, \$2, (NR % 2) + 1 }').collectFile(name:'categorical_covariates.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_LDMS1_VARIANTS") { + script "../../../gawk/main.nf" process { """ input[0] = [ - [ id:"QuantitativeTrait" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_quantitative.noheader.txt", checkIfExists: true) + [ id:'bfile_ldms1_variants' ], + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true) ] - input[1] = [ - [ id:"plink_simulated_ldms" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms.mgrm", checkIfExists: true), + input[1] = Channel.of('NR <= 800 { print \$2 }').collectFile(name:'ldms1_variants.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_LDMS2_VARIANTS") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'bfile_ldms2_variants' ], + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true) + ] + input[1] = Channel.of('NR > 800 { print \$2 }').collectFile(name:'ldms2_variants.awk') + input[2] = false + """ + } + } + + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS1") { + script "../../makegrmpart/main.nf" + process { + """ + file('bfile.mbfile').text = 'bfile\\n' + + input[0] = [ + [ id:'bfile_ldms1', part_gcta_job:1, nparts_gcta:1 ], + file('bfile.mbfile'), + [ + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bed', checkIfExists: true) + ], [ - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.N.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.N.bin", checkIfExists: true) + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true) + ], + [ + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) ] ] - input[2] = [ - [ id:"covariates_quant" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_covariates.quant.noheader.txt", checkIfExists: true) - ] - input[3] = [ - [ id:"covariates_cat" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_covariates.cat.noheader.txt", checkIfExists: true) + input[1] = GAWK_LDMS1_VARIANTS.out.output + """ + } + } + + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS2") { + script "../../makegrmpart/main.nf" + process { + """ + file('bfile.mbfile').text = 'bfile\\n' + + input[0] = [ + [ id:'bfile_ldms2', part_gcta_job:1, nparts_gcta:1 ], + file('bfile.mbfile'), + [ + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bed', checkIfExists: true) + ], + [ + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true) + ], + [ + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) + ] ] + input[1] = GAWK_LDMS2_VARIANTS.out.output + """ + } + } + } + + test("homo_sapiens gsmr - quantitative phenotype with ldms mgrm and covariates") { + config "./nextflow.config" + when { + process { + """ + mgrm_file = Channel + .of('bfile_ldms1.part_1_1\\nbfile_ldms2.part_1_1') + .collectFile(name:'bfile_ldms.mgrm', newLine: true) + + ldms_grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files + .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files) + .map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] } + .collect() + .map { rows -> rows.flatten() } + + input[0] = GAWK_QUANTITATIVE_PHENO.out.output + input[1] = mgrm_file + .combine(ldms_grm_files) + .map { row -> [[ id:'bfile_ldms' ], row[0], row[1..-1]] } + input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[3] = GAWK_CATEGORICAL_COVARIATES.out.output """ } } @@ -57,31 +171,29 @@ nextflow_process { } } - test("homo_sapiens popgen - quantitative phenotype with ldms mgrm - stub") { + test("homo_sapiens gsmr - quantitative phenotype with ldms mgrm - stub") { options "-stub" config "./nextflow.config" when { process { """ - input[0] = [ - [ id:"QuantitativeTrait" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_quantitative.noheader.txt", checkIfExists: true) - ] - input[1] = [ - [ id:"plink_simulated_ldms" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms.mgrm", checkIfExists: true), - [ - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms1.grm.N.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated_ldms2.grm.N.bin", checkIfExists: true) - ] - ] - input[2] = [[ id:"covariates_quant" ], []] - input[3] = [[ id:"covariates_cat" ], []] + mgrm_file = Channel + .of('bfile_ldms1.part_1_1\\nbfile_ldms2.part_1_1') + .collectFile(name:'bfile_ldms.mgrm', newLine: true) + + ldms_grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files + .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files) + .map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] } + .collect() + .map { rows -> rows.flatten() } + + input[0] = GAWK_QUANTITATIVE_PHENO.out.output + input[1] = mgrm_file + .combine(ldms_grm_files) + .map { row -> [[ id:'bfile_ldms' ], row[0], row[1..-1]] } + input[2] = [[ id:'covariates_quant' ], []] + input[3] = [[ id:'covariates_cat' ], []] """ } } diff --git a/modules/nf-core/gcta/remlldms/tests/main.nf.test.snap b/modules/nf-core/gcta/remlldms/tests/main.nf.test.snap index eb76250540e4..77349463ed82 100644 --- a/modules/nf-core/gcta/remlldms/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/remlldms/tests/main.nf.test.snap @@ -1,31 +1,5 @@ { - "homo_sapiens popgen - quantitative phenotype with ldms mgrm and covariates": { - "content": [ - [ - [ - { - "id": "QuantitativeTrait" - }, - "QuantitativeTrait.hsq:md5,fd2cbee5b278ec127dbbca43abee5810" - ] - ], - { - "versions_gcta": [ - [ - "GCTA_REMLLDMS", - "gcta", - "*******************************************************************" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.4" - }, - "timestamp": "2026-03-13T14:21:30.18552419" - }, - "homo_sapiens popgen - quantitative phenotype with ldms mgrm - stub": { + "homo_sapiens gsmr - quantitative phenotype with ldms mgrm - stub": { "content": [ { "0": [ @@ -64,6 +38,32 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T14:14:59.682890873" + "timestamp": "2026-03-13T15:57:32.371358163" + }, + "homo_sapiens gsmr - quantitative phenotype with ldms mgrm and covariates": { + "content": [ + [ + [ + { + "id": "QuantitativeTrait" + }, + "QuantitativeTrait.hsq:md5,3a0b00fba467f22b6090f0233188856a" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_REMLLDMS", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T15:57:18.520897746" } -} \ No newline at end of file +} diff --git a/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test b/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test index 52ad6ef3cd09..2716ea7a9970 100644 --- a/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test +++ b/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test @@ -8,6 +8,33 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/removerelatedsubjects" + tag "gcta/makegrmpart" + + setup { + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [[ id:'all_variants' ], []] + """ + } + } + } test("homo_sapiens popgen - remove related individuals from dense GRM") { config "./nextflow.config" @@ -15,12 +42,12 @@ nextflow_process { when { process { """ - input[0] = [ - [ id:"plink_simulated" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.N.bin", checkIfExists: true) - ] + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm """ } } @@ -30,7 +57,7 @@ nextflow_process { { assert process.success }, { assert process.out.grm_files.size() == 1 }, { assert process.out.keep_file.size() == 1 }, - { assert process.out.grm_files.get(0).get(0).id == "plink_simulated" }, + { assert process.out.grm_files.get(0).get(0).id == "plink_simulated_dense.part_1_1" }, { assert snapshot( process.out.grm_files, @@ -49,12 +76,12 @@ nextflow_process { when { process { """ - input[0] = [ - [ id:"plink_simulated" ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.id", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.bin", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.grm.N.bin", checkIfExists: true) - ] + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm """ } } diff --git a/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test.snap b/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test.snap index 6b0831a1c210..cb84a8ad5065 100644 --- a/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test.snap @@ -4,19 +4,19 @@ [ [ { - "id": "plink_simulated" + "id": "plink_simulated_dense.part_1_1" }, - "plink_simulated_unrel05.grm.id:md5,ca8c0bded6951fdd3bf0dddc97b6df6b", - "plink_simulated_unrel05.grm.bin:md5,b1f124463eecbae86840a6651eec372d", - "plink_simulated_unrel05.grm.N.bin:md5,06b73ea8bae8f1e5f5d4de33dbd2c75e" + "plink_simulated_dense.part_1_1_unrel05.grm.id:md5,ca8c0bded6951fdd3bf0dddc97b6df6b", + "plink_simulated_dense.part_1_1_unrel05.grm.bin:md5,b1f124463eecbae86840a6651eec372d", + "plink_simulated_dense.part_1_1_unrel05.grm.N.bin:md5,06b73ea8bae8f1e5f5d4de33dbd2c75e" ] ], [ [ { - "id": "plink_simulated" + "id": "plink_simulated_dense.part_1_1" }, - "plink_simulated_unrel05.grm.id:md5,ca8c0bded6951fdd3bf0dddc97b6df6b" + "plink_simulated_dense.part_1_1_unrel05.grm.id:md5,ca8c0bded6951fdd3bf0dddc97b6df6b" ] ], { @@ -33,7 +33,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T14:21:44.229406402" + "timestamp": "2026-03-13T15:36:41.961471309" }, "homo_sapiens popgen - remove related individuals from dense GRM - stub": { "content": [ @@ -41,19 +41,19 @@ "0": [ [ { - "id": "plink_simulated" + "id": "plink_simulated_dense.part_1_1" }, - "plink_simulated_unrel05.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", - "plink_simulated_unrel05.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", - "plink_simulated_unrel05.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + "plink_simulated_dense.part_1_1_unrel05.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_unrel05.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_unrel05.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ [ { - "id": "plink_simulated" + "id": "plink_simulated_dense.part_1_1" }, - "plink_simulated_unrel05.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e" + "plink_simulated_dense.part_1_1_unrel05.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "2": [ @@ -66,19 +66,19 @@ "grm_files": [ [ { - "id": "plink_simulated" + "id": "plink_simulated_dense.part_1_1" }, - "plink_simulated_unrel05.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", - "plink_simulated_unrel05.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", - "plink_simulated_unrel05.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + "plink_simulated_dense.part_1_1_unrel05.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_unrel05.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_unrel05.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "keep_file": [ [ { - "id": "plink_simulated" + "id": "plink_simulated_dense.part_1_1" }, - "plink_simulated_unrel05.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e" + "plink_simulated_dense.part_1_1_unrel05.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions_gcta": [ @@ -94,6 +94,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T14:15:17.5409981" + "timestamp": "2026-03-13T15:36:48.571305038" } } \ No newline at end of file From 689f6e9095d693c7ae6a1a406761c5da615e3561 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Fri, 13 Mar 2026 22:01:45 +0800 Subject: [PATCH 3/9] feat: refine gcta modules and tests --- modules/nf-core/gcta/addgrms/main.nf | 4 +- modules/nf-core/gcta/adjustgrm/main.nf | 4 +- modules/nf-core/gcta/bivariatereml/main.nf | 4 +- .../nf-core/gcta/bivariateremlldms/main.nf | 4 +- .../gcta/calculateldscores/environment.yml | 1 + .../nf-core/gcta/calculateldscores/main.nf | 53 ++++++------- .../nf-core/gcta/calculateldscores/meta.yml | 5 ++ .../gcta/calculateldscores/tests/main.nf.test | 14 ++-- .../calculateldscores/tests/main.nf.test.snap | 2 +- .../calculateldscores/tests/nextflow.config | 1 - modules/nf-core/gcta/fastgwa/main.nf | 13 ++-- modules/nf-core/gcta/fastgwa/meta.yml | 13 +++- .../nf-core/gcta/fastgwa/tests/main.nf.test | 77 +++++++++++++++++-- .../gcta/fastgwa/tests/main.nf.test.snap | 47 +++++++++-- .../gcta/fastgwa/tests/nextflow.config | 6 -- .../nf-core/gcta/filtergrmwithkeep/main.nf | 4 +- modules/nf-core/gcta/makebksparse/main.nf | 4 +- modules/nf-core/gcta/makegrmpart/main.nf | 4 +- modules/nf-core/gcta/reml/main.nf | 4 +- modules/nf-core/gcta/remlldms/main.nf | 4 +- .../nf-core/gcta/remlldms/tests/main.nf.test | 22 +++--- .../gcta/removerelatedsubjects/main.nf | 4 +- 22 files changed, 197 insertions(+), 97 deletions(-) diff --git a/modules/nf-core/gcta/addgrms/main.nf b/modules/nf-core/gcta/addgrms/main.nf index 95e13c207611..22fb49319ff8 100644 --- a/modules/nf-core/gcta/addgrms/main.nf +++ b/modules/nf-core/gcta/addgrms/main.nf @@ -3,8 +3,8 @@ process GCTA_ADDGRMS { label 'process_medium' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : - 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" input: tuple val(meta), path(mgrm_file), path(grm_files) diff --git a/modules/nf-core/gcta/adjustgrm/main.nf b/modules/nf-core/gcta/adjustgrm/main.nf index 28d1493f242e..ced05e67a134 100644 --- a/modules/nf-core/gcta/adjustgrm/main.nf +++ b/modules/nf-core/gcta/adjustgrm/main.nf @@ -3,8 +3,8 @@ process GCTA_ADJUSTGRM { label 'process_medium' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : - 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" input: tuple val(meta), path(grm_id), path(grm_bin), path(grm_n_bin) diff --git a/modules/nf-core/gcta/bivariatereml/main.nf b/modules/nf-core/gcta/bivariatereml/main.nf index 51c1164aba7f..9bf4829b6239 100644 --- a/modules/nf-core/gcta/bivariatereml/main.nf +++ b/modules/nf-core/gcta/bivariatereml/main.nf @@ -3,8 +3,8 @@ process GCTA_BIVARIATEREML { label 'process_medium' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : - 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" input: tuple val(meta), path(phenotype_file) diff --git a/modules/nf-core/gcta/bivariateremlldms/main.nf b/modules/nf-core/gcta/bivariateremlldms/main.nf index 9de71dba4a2a..6d706efbca8d 100644 --- a/modules/nf-core/gcta/bivariateremlldms/main.nf +++ b/modules/nf-core/gcta/bivariateremlldms/main.nf @@ -3,8 +3,8 @@ process GCTA_BIVARIATEREMLLDMS { label 'process_medium' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : - 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" input: tuple val(meta), path(phenotype_file) diff --git a/modules/nf-core/gcta/calculateldscores/environment.yml b/modules/nf-core/gcta/calculateldscores/environment.yml index 3e22ea7b9f20..c1556bf88999 100644 --- a/modules/nf-core/gcta/calculateldscores/environment.yml +++ b/modules/nf-core/gcta/calculateldscores/environment.yml @@ -5,3 +5,4 @@ channels: - bioconda dependencies: - bioconda::gcta=1.94.1 + - conda-forge::r-base=4.3.1 diff --git a/modules/nf-core/gcta/calculateldscores/main.nf b/modules/nf-core/gcta/calculateldscores/main.nf index 48770b12941d..3c37acd23183 100644 --- a/modules/nf-core/gcta/calculateldscores/main.nf +++ b/modules/nf-core/gcta/calculateldscores/main.nf @@ -3,11 +3,12 @@ process GCTA_CALCULATELDSCORES { label 'process_medium' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : - 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + 'docker://community.wave.seqera.io/library/gcta_r-base:31127c93877b38de' : + 'community.wave.seqera.io/library/gcta_r-base:31127c93877b38de' }" input: tuple val(meta), path(bed), path(bim), path(fam) + val ld_score_region output: tuple val(meta), path("${meta.id}_gcta_ld.score.ld"), emit: ld_scores @@ -19,7 +20,6 @@ process GCTA_CALCULATELDSCORES { script: def extra_args = task.ext.args ?: '' - def ld_score_region = task.ext.ld_score_region ?: 200 """ set -euo pipefail @@ -30,38 +30,29 @@ process GCTA_CALCULATELDSCORES { --out ${meta.id}_gcta_ld \\ --thread-num ${task.cpus} ${extra_args} - ld_file="${meta.id}_gcta_ld.score.ld" - sorted_file="ldscore.sorted.tsv" + Rscript - ${meta.id}_gcta_ld.score.ld ${meta.id} <<'EOF' + args <- commandArgs(trailingOnly = TRUE) + filename <- args[1] + out_prefix <- args[2] - awk 'NR > 1 { print \$1 "\\t" \$8 }' "${meta.id}_gcta_ld.score.ld" | sort -k2,2n > "\${sorted_file}" + lds_seg <- read.table( + filename, + header = TRUE, + colClasses = c("character", rep("numeric", 8)) + ) - count=\$(wc -l < "\${sorted_file}") - q1_idx=\$(( (count + 3) / 4 )) - q2_idx=\$(( (count + 1) / 2 )) - q3_idx=\$(( (3 * count + 1) / 4 )) + quartiles <- summary(lds_seg\$ldscore_SNP) - q1=\$(awk -v idx="\${q1_idx}" 'NR == idx { print \$2 }' "\${sorted_file}") - q2=\$(awk -v idx="\${q2_idx}" 'NR == idx { print \$2 }' "\${sorted_file}") - q3=\$(awk -v idx="\${q3_idx}" 'NR == idx { print \$2 }' "\${sorted_file}") + lb1 <- which(lds_seg\$ldscore_SNP <= quartiles[2]) + lb2 <- which(lds_seg\$ldscore_SNP > quartiles[2] & lds_seg\$ldscore_SNP <= quartiles[3]) + lb3 <- which(lds_seg\$ldscore_SNP > quartiles[3] & lds_seg\$ldscore_SNP <= quartiles[5]) + lb4 <- which(lds_seg\$ldscore_SNP > quartiles[5]) - : > "${meta.id}_snp_group1.txt" - : > "${meta.id}_snp_group2.txt" - : > "${meta.id}_snp_group3.txt" - : > "${meta.id}_snp_group4.txt" - - awk -v q1="\${q1}" -v q2="\${q2}" -v q3="\${q3}" -v prefix="${meta.id}" ' - NR > 1 { - if (\$8 <= q1) { - print \$1 >> prefix "_snp_group1.txt" - } else if (\$8 <= q2) { - print \$1 >> prefix "_snp_group2.txt" - } else if (\$8 <= q3) { - print \$1 >> prefix "_snp_group3.txt" - } else { - print \$1 >> prefix "_snp_group4.txt" - } - } - ' "\${ld_file}" + write.table(lds_seg\$SNP[lb1], paste(out_prefix, "snp_group1.txt", sep = "_"), row.names = FALSE, quote = FALSE, col.names = FALSE, append = TRUE) + write.table(lds_seg\$SNP[lb2], paste(out_prefix, "snp_group2.txt", sep = "_"), row.names = FALSE, quote = FALSE, col.names = FALSE, append = TRUE) + write.table(lds_seg\$SNP[lb3], paste(out_prefix, "snp_group3.txt", sep = "_"), row.names = FALSE, quote = FALSE, col.names = FALSE, append = TRUE) + write.table(lds_seg\$SNP[lb4], paste(out_prefix, "snp_group4.txt", sep = "_"), row.names = FALSE, quote = FALSE, col.names = FALSE, append = TRUE) + EOF """ stub: diff --git a/modules/nf-core/gcta/calculateldscores/meta.yml b/modules/nf-core/gcta/calculateldscores/meta.yml index 16206208802d..725948766c5b 100644 --- a/modules/nf-core/gcta/calculateldscores/meta.yml +++ b/modules/nf-core/gcta/calculateldscores/meta.yml @@ -35,6 +35,11 @@ input: description: PLINK1 sample metadata file pattern: "*.{fam}" ontologies: [] + - ld_score_region: + type: integer + description: | + LD-score region width passed to `--ld-score-region`. + Callers should pass `200` for the default GCTA region width unless they are intentionally overriding it. output: ld_scores: - - meta: diff --git a/modules/nf-core/gcta/calculateldscores/tests/main.nf.test b/modules/nf-core/gcta/calculateldscores/tests/main.nf.test index fc0c18b08058..bd28c7a09e0d 100644 --- a/modules/nf-core/gcta/calculateldscores/tests/main.nf.test +++ b/modules/nf-core/gcta/calculateldscores/tests/main.nf.test @@ -18,10 +18,11 @@ nextflow_process { """ input[0] = [ [ id:'bfile' ], - file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bed', checkIfExists: true), - file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true), - file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) ] + input[1] = 50 """ } } @@ -56,10 +57,11 @@ nextflow_process { """ input[0] = [ [ id:'bfile' ], - file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bed', checkIfExists: true), - file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true), - file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) ] + input[1] = 200 """ } } diff --git a/modules/nf-core/gcta/calculateldscores/tests/main.nf.test.snap b/modules/nf-core/gcta/calculateldscores/tests/main.nf.test.snap index 5ce5aafa7a90..441aec6cda44 100644 --- a/modules/nf-core/gcta/calculateldscores/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/calculateldscores/tests/main.nf.test.snap @@ -23,7 +23,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T16:00:56.198048546" + "timestamp": "2026-03-13T21:45:20.766416151" }, "homo_sapiens gsmr - plink1 - stub": { "content": [ diff --git a/modules/nf-core/gcta/calculateldscores/tests/nextflow.config b/modules/nf-core/gcta/calculateldscores/tests/nextflow.config index c34dbbbbd82d..80c45aadd02f 100644 --- a/modules/nf-core/gcta/calculateldscores/tests/nextflow.config +++ b/modules/nf-core/gcta/calculateldscores/tests/nextflow.config @@ -5,6 +5,5 @@ params { process { withName: "GCTA_CALCULATELDSCORES" { cpus = 1 - ext.ld_score_region = 50 } } diff --git a/modules/nf-core/gcta/fastgwa/main.nf b/modules/nf-core/gcta/fastgwa/main.nf index c488883ea0b7..7895be0a5a40 100644 --- a/modules/nf-core/gcta/fastgwa/main.nf +++ b/modules/nf-core/gcta/fastgwa/main.nf @@ -3,8 +3,8 @@ process GCTA_FASTGWA { label 'process_medium' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : - 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" input: tuple val(meta), path(bed_pgen), path(bim_pvar), path(fam_psam) @@ -12,6 +12,7 @@ process GCTA_FASTGWA { tuple val(meta3), path(phenotype_file) tuple val(meta4), path(quant_covariates_file) tuple val(meta5), path(cat_covariates_file) + val mlm_exact output: tuple val(meta), val(meta3), path("${meta.id}_${meta3.id}.fastGWA"), emit: results @@ -24,20 +25,20 @@ process GCTA_FASTGWA { def qcovar_arg = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : '' def covar_arg = cat_covariates_file ? "--covar ${cat_covariates_file}" : '' def mpheno_arg = meta3.mpheno ? "--mpheno ${meta3.mpheno}" : '' - def grm_prefix = meta2.id + def grm_arg = meta3.is_binary ? '' : "--grm-sparse ${meta2.id}" def genotype_suffix = bed_pgen.name.tokenize('.').last() def genotype_flag = genotype_suffix == 'pgen' ? '--pfile' : '--bfile' - def genotype_prefix = bed_pgen.name.replaceFirst(/\.(bed|pgen)$/, '') + def genotype_prefix = meta.id def out = "${meta.id}_${meta3.id}" def extra_args = task.ext.args ?: '' - def mode_arg = extra_args.contains('--fastGWA-mlm-exact') || extra_args.contains('--fastGWA-lr') ? '' : '--fastGWA-mlm' + def mode_arg = meta3.is_binary ? '--fastGWA-lr' : (mlm_exact ? '--fastGWA-mlm-exact' : '--fastGWA-mlm') """ set -euo pipefail gcta \\ ${genotype_flag} ${genotype_prefix} \\ - --grm-sparse ${grm_prefix} \\ + ${grm_arg} \\ ${mode_arg} \\ --pheno ${phenotype_file} \\ ${qcovar_arg} \\ diff --git a/modules/nf-core/gcta/fastgwa/meta.yml b/modules/nf-core/gcta/fastgwa/meta.yml index ab66bad26a32..e6f2cd00e336 100644 --- a/modules/nf-core/gcta/fastgwa/meta.yml +++ b/modules/nf-core/gcta/fastgwa/meta.yml @@ -39,21 +39,23 @@ input: description: | Groovy map containing sparse GRM metadata e.g. `[ id:'plink_simulated_sp' ]` + Used for non-binary fastGWA MLM modes and ignored when `meta3.is_binary` is true - sparse_grm_id: type: file - description: Sparse GRM ID file (`.grm.id`) + description: Sparse GRM ID file (`.grm.id`), ignored when `meta3.is_binary` is true pattern: "*.grm.id" ontologies: [] - sparse_grm_sp: type: file - description: Sparse GRM sparse matrix file (`.grm.sp`) + description: Sparse GRM sparse matrix file (`.grm.sp`), ignored when `meta3.is_binary` is true pattern: "*.grm.sp" ontologies: [] - - meta3: type: map description: | Groovy map containing phenotype metadata - e.g. `[ id:'QuantitativeTrait' ]` + e.g. `[ id:'QuantitativeTrait', is_binary:false ]` + `meta3.is_binary` is required and selects logistic vs MLM fastGWA mode Optional phenotype selector may be supplied as `meta3.mpheno` - phenotype_file: type: file @@ -83,6 +85,11 @@ input: pattern: "*.{covar,cov,txt,tsv}" ontologies: - edam: "http://edamontology.org/format_3475" + - mlm_exact: + type: boolean + description: | + Apply `--fastGWA-mlm-exact` for non-binary phenotypes. + Ignored when `meta3.is_binary` is true because binary phenotypes use `--fastGWA-lr`. output: results: - - meta: diff --git a/modules/nf-core/gcta/fastgwa/tests/main.nf.test b/modules/nf-core/gcta/fastgwa/tests/main.nf.test index 5c2a36d578cd..6889d8a002d1 100644 --- a/modules/nf-core/gcta/fastgwa/tests/main.nf.test +++ b/modules/nf-core/gcta/fastgwa/tests/main.nf.test @@ -11,6 +11,7 @@ nextflow_process { tag "gcta/makegrmpart" tag "gcta/makebksparse" tag "gawk" + config "./nextflow.config" setup { run("GAWK", alias: "GAWK_QUANTITATIVE_PHENO") { @@ -27,6 +28,20 @@ nextflow_process { } } + run("GAWK", alias: "GAWK_BINARY_PHENO") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'BinaryTrait' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_binary_phenoname.phe', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'binary_phenotype.awk') + input[2] = false + """ + } + } + run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") { script "../../../gawk/main.nf" process { @@ -95,8 +110,7 @@ nextflow_process { } } - test("homo_sapiens popgen - plink1 with sparse GRM and covariates") { - config "./nextflow.config" + test("homo_sapiens popgen - plink1 with sparse GRM and quantitative phenotype") { when { process { """ @@ -104,6 +118,10 @@ nextflow_process { [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] } + quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file -> + [[ id:meta.id, is_binary:false ], phenotype_file] + } + input[0] = [ [ id:"plink_simulated" ], file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bed", checkIfExists: true), @@ -111,9 +129,10 @@ nextflow_process { file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true) ] input[1] = sparse_grm - input[2] = GAWK_QUANTITATIVE_PHENO.out.output + input[2] = quantitative_pheno input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output input[4] = GAWK_CATEGORICAL_COVARIATES.out.output + input[5] = true """ } } @@ -135,9 +154,52 @@ nextflow_process { } } + test("homo_sapiens popgen - plink1 with sparse GRM and binary phenotype") { + when { + process { + """ + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> + [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] + } + + binary_pheno = GAWK_BINARY_PHENO.out.output.map { meta, phenotype_file -> + [[ id:meta.id, is_binary:true ], phenotype_file] + } + + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bed", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true) + ] + input[1] = sparse_grm + input[2] = binary_pheno + input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[4] = GAWK_CATEGORICAL_COVARIATES.out.output + input[5] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.results.size() == 1 }, + { assert process.out.results.get(0).get(0).id == "plink_simulated" }, + { assert process.out.results.get(0).get(1).id == "BinaryTrait" }, + { assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_BinaryTrait.fastGWA" }, + { + assert snapshot( + process.out.results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + test("homo_sapiens popgen - plink2 with sparse GRM - stub") { options "-stub" - config "./nextflow.config" when { process { @@ -146,6 +208,10 @@ nextflow_process { [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] } + quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file -> + [[ id:meta.id, is_binary:false ], phenotype_file] + } + input[0] = [ [ id:"plink_simulated" ], file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pgen", checkIfExists: true), @@ -153,9 +219,10 @@ nextflow_process { file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.psam", checkIfExists: true) ] input[1] = sparse_grm - input[2] = GAWK_QUANTITATIVE_PHENO.out.output + input[2] = quantitative_pheno input[3] = [[ id:'covariates_quant' ], []] input[4] = [[ id:'covariates_cat' ], []] + input[5] = false """ } } diff --git a/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap b/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap index 9c01c905e7fb..e94665a7d33a 100644 --- a/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap @@ -8,7 +8,8 @@ "id": "plink_simulated" }, { - "id": "QuantitativeTrait" + "id": "QuantitativeTrait", + "is_binary": false }, "plink_simulated_QuantitativeTrait.fastGWA:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -26,7 +27,8 @@ "id": "plink_simulated" }, { - "id": "QuantitativeTrait" + "id": "QuantitativeTrait", + "is_binary": false }, "plink_simulated_QuantitativeTrait.fastGWA:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -44,9 +46,9 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T14:13:39.722374131" + "timestamp": "2026-03-13T18:15:29.679228048" }, - "homo_sapiens popgen - plink1 with sparse GRM and covariates": { + "homo_sapiens popgen - plink1 with sparse GRM and quantitative phenotype": { "content": [ [ [ @@ -54,7 +56,8 @@ "id": "plink_simulated" }, { - "id": "QuantitativeTrait" + "id": "QuantitativeTrait", + "is_binary": false }, "plink_simulated_QuantitativeTrait.fastGWA:md5,ba64c9460f412ffa7afb4060eaa029e4" ] @@ -73,6 +76,36 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T15:35:49.465395409" + "timestamp": "2026-03-13T18:11:54.793137401" + }, + "homo_sapiens popgen - plink1 with sparse GRM and binary phenotype": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + { + "id": "BinaryTrait", + "is_binary": true + }, + "plink_simulated_BinaryTrait.fastGWA:md5,723602dcb94b8a08b3652f1491dcd2ee" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_FASTGWA", + "gcta", + "*******************************************************************" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T18:15:22.74128729" } -} \ No newline at end of file +} diff --git a/modules/nf-core/gcta/fastgwa/tests/nextflow.config b/modules/nf-core/gcta/fastgwa/tests/nextflow.config index 71a0143df3e3..de31e0218829 100644 --- a/modules/nf-core/gcta/fastgwa/tests/nextflow.config +++ b/modules/nf-core/gcta/fastgwa/tests/nextflow.config @@ -1,9 +1,3 @@ params { modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" } - -process { - withName: "GCTA_FASTGWA" { - ext.args = { "--fastGWA-mlm-exact" } - } -} diff --git a/modules/nf-core/gcta/filtergrmwithkeep/main.nf b/modules/nf-core/gcta/filtergrmwithkeep/main.nf index f06fc5968a53..9701e66bc0e1 100644 --- a/modules/nf-core/gcta/filtergrmwithkeep/main.nf +++ b/modules/nf-core/gcta/filtergrmwithkeep/main.nf @@ -3,8 +3,8 @@ process GCTA_FILTERGRMWITHKEEP { label 'process_medium' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : - 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" input: tuple val(meta), path(grm_id), path(grm_bin), path(grm_n_bin) diff --git a/modules/nf-core/gcta/makebksparse/main.nf b/modules/nf-core/gcta/makebksparse/main.nf index ac8195303e08..d2cd63987795 100644 --- a/modules/nf-core/gcta/makebksparse/main.nf +++ b/modules/nf-core/gcta/makebksparse/main.nf @@ -3,8 +3,8 @@ process GCTA_MAKEBKSPARSE { label 'process_medium' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : - 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" input: tuple val(meta), path(grm_id), path(grm_bin), path(grm_n_bin) diff --git a/modules/nf-core/gcta/makegrmpart/main.nf b/modules/nf-core/gcta/makegrmpart/main.nf index 6a2552c8a5a2..b234712c1525 100644 --- a/modules/nf-core/gcta/makegrmpart/main.nf +++ b/modules/nf-core/gcta/makegrmpart/main.nf @@ -3,8 +3,8 @@ process GCTA_MAKEGRMPART { label 'process_medium' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : - 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" input: tuple val(meta), path(mfile), path(bed_pgen), path(bim_pvar), path(fam_psam) diff --git a/modules/nf-core/gcta/reml/main.nf b/modules/nf-core/gcta/reml/main.nf index f8c7b6fccb86..07f26d88f36b 100644 --- a/modules/nf-core/gcta/reml/main.nf +++ b/modules/nf-core/gcta/reml/main.nf @@ -4,8 +4,8 @@ process GCTA_REML { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : - 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" input: tuple val(meta), path(phenotypes_file) diff --git a/modules/nf-core/gcta/remlldms/main.nf b/modules/nf-core/gcta/remlldms/main.nf index 0661493da918..d3436b62f40b 100644 --- a/modules/nf-core/gcta/remlldms/main.nf +++ b/modules/nf-core/gcta/remlldms/main.nf @@ -3,8 +3,8 @@ process GCTA_REMLLDMS { label 'process_medium' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : - 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" input: tuple val(meta), path(phenotypes_file) diff --git a/modules/nf-core/gcta/remlldms/tests/main.nf.test b/modules/nf-core/gcta/remlldms/tests/main.nf.test index 02ce8f2bcd56..7cf0bc13c8af 100644 --- a/modules/nf-core/gcta/remlldms/tests/main.nf.test +++ b/modules/nf-core/gcta/remlldms/tests/main.nf.test @@ -18,7 +18,7 @@ nextflow_process { """ input[0] = [ [ id:'QuantitativeTrait' ], - file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) ] input[1] = Channel.of('{ print \$1, \$2, ((NR % 11) - 5) + (((NR * NR) % 7) / 10.0) }').collectFile(name:'quantitative_phenotype.awk') input[2] = false @@ -32,7 +32,7 @@ nextflow_process { """ input[0] = [ [ id:'covariates_quant' ], - file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) ] input[1] = Channel.of('{ print \$1, \$2, ((NR % 9) - 4) / 3.0 }').collectFile(name:'quantitative_covariates.awk') input[2] = false @@ -46,7 +46,7 @@ nextflow_process { """ input[0] = [ [ id:'covariates_cat' ], - file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) ] input[1] = Channel.of('{ print \$1, \$2, (NR % 2) + 1 }').collectFile(name:'categorical_covariates.awk') input[2] = false @@ -60,7 +60,7 @@ nextflow_process { """ input[0] = [ [ id:'bfile_ldms1_variants' ], - file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true) ] input[1] = Channel.of('NR <= 800 { print \$2 }').collectFile(name:'ldms1_variants.awk') input[2] = false @@ -74,7 +74,7 @@ nextflow_process { """ input[0] = [ [ id:'bfile_ldms2_variants' ], - file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true) ] input[1] = Channel.of('NR > 800 { print \$2 }').collectFile(name:'ldms2_variants.awk') input[2] = false @@ -92,13 +92,13 @@ nextflow_process { [ id:'bfile_ldms1', part_gcta_job:1, nparts_gcta:1 ], file('bfile.mbfile'), [ - file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bed', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.bed', checkIfExists: true) ], [ - file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true) ], [ - file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) ] ] input[1] = GAWK_LDMS1_VARIANTS.out.output @@ -116,13 +116,13 @@ nextflow_process { [ id:'bfile_ldms2', part_gcta_job:1, nparts_gcta:1 ], file('bfile.mbfile'), [ - file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bed', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.bed', checkIfExists: true) ], [ - file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true) ], [ - file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) ] ] input[1] = GAWK_LDMS2_VARIANTS.out.output diff --git a/modules/nf-core/gcta/removerelatedsubjects/main.nf b/modules/nf-core/gcta/removerelatedsubjects/main.nf index cf81faeab04b..2214805c8ce5 100644 --- a/modules/nf-core/gcta/removerelatedsubjects/main.nf +++ b/modules/nf-core/gcta/removerelatedsubjects/main.nf @@ -3,8 +3,8 @@ process GCTA_REMOVERELATEDSUBJECTS { label 'process_medium' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0' : - 'biocontainers/gcta:1.94.1--h9ee0642_0' }" + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" input: tuple val(meta), path(grm_id), path(grm_bin), path(grm_n_bin) From f1d8361d8c4dc6a443558a67c141518f8ca0d269 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 14 Mar 2026 23:24:03 +0800 Subject: [PATCH 4/9] test: stabilize gcta bivariateremlldms snapshot --- .../gcta/bivariateremlldms/tests/main.nf.test | 9 +++++- .../bivariateremlldms/tests/main.nf.test.snap | 28 +++++++++++++++++-- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test b/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test index bdba09994011..b4af8b8c6d08 100644 --- a/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test +++ b/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test @@ -159,8 +159,15 @@ nextflow_process { { assert file(process.out.log_file.get(0).get(1)).name == "Trait1__Trait2.log" }, { assert file(process.out.log_file.get(0).get(1)).exists() }, { + def resultFile = file(process.out.bivariate_results.get(0).get(1)) assert snapshot( - process.out.bivariate_results, + [ + [ + process.out.bivariate_results.get(0).get(0), + resultFile.name, + resultFile.readLines().collect { it.tokenize()[0] } + ] + ], process.out.findAll { key, val -> key.startsWith('versions') } ).match() } diff --git a/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test.snap b/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test.snap index 60ce78e5668d..6289a546a78b 100644 --- a/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test.snap @@ -6,7 +6,29 @@ { "id": "Trait1__Trait2" }, - "Trait1__Trait2.hsq:md5,fca157825307e58cb3e0423cc80bd97a" + "Trait1__Trait2.hsq", + [ + "Source", + "V(G1)_tr1", + "V(G1)_tr2", + "C(G1)_tr12", + "V(G2)_tr1", + "V(G2)_tr2", + "C(G2)_tr12", + "V(e)_tr1", + "V(e)_tr2", + "C(e)_tr12", + "Vp_tr1", + "Vp_tr2", + "V(G1)/Vp_tr1", + "V(G1)/Vp_tr2", + "V(G2)/Vp_tr1", + "V(G2)/Vp_tr2", + "rG1", + "rG2", + "logL", + "n" + ] ] ], { @@ -23,7 +45,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T15:38:31.82813107" + "timestamp": "2026-03-14T22:25:42.183313239" }, "homo_sapiens popgen - bivariate phenotype with ldms mgrm - stub": { "content": [ @@ -80,6 +102,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T14:08:47.444308099" + "timestamp": "2026-03-14T22:25:54.732574761" } } \ No newline at end of file From 09e8a36f96f88d7b08f2bcf62d11adf2c728cd16 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 14 Mar 2026 23:49:38 +0800 Subject: [PATCH 5/9] fix: capture gcta versions correctly --- modules/nf-core/gcta/addgrms/main.nf | 2 +- modules/nf-core/gcta/addgrms/meta.yml | 4 ++-- modules/nf-core/gcta/addgrms/tests/main.nf.test.snap | 6 +++--- modules/nf-core/gcta/adjustgrm/main.nf | 2 +- modules/nf-core/gcta/adjustgrm/meta.yml | 4 ++-- modules/nf-core/gcta/adjustgrm/tests/main.nf.test.snap | 6 +++--- modules/nf-core/gcta/bivariatereml/main.nf | 2 +- modules/nf-core/gcta/bivariatereml/meta.yml | 4 ++-- .../nf-core/gcta/bivariatereml/tests/main.nf.test.snap | 6 +++--- modules/nf-core/gcta/bivariateremlldms/main.nf | 2 +- modules/nf-core/gcta/bivariateremlldms/meta.yml | 4 ++-- .../gcta/bivariateremlldms/tests/main.nf.test.snap | 6 +++--- modules/nf-core/gcta/calculateldscores/main.nf | 2 +- modules/nf-core/gcta/calculateldscores/meta.yml | 4 ++-- .../gcta/calculateldscores/tests/main.nf.test.snap | 6 +++--- modules/nf-core/gcta/fastgwa/main.nf | 2 +- modules/nf-core/gcta/fastgwa/meta.yml | 4 ++-- modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap | 8 ++++---- modules/nf-core/gcta/filtergrmwithkeep/main.nf | 2 +- modules/nf-core/gcta/filtergrmwithkeep/meta.yml | 4 ++-- .../gcta/filtergrmwithkeep/tests/main.nf.test.snap | 6 +++--- modules/nf-core/gcta/makebksparse/main.nf | 2 +- modules/nf-core/gcta/makebksparse/meta.yml | 4 ++-- modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap | 6 +++--- modules/nf-core/gcta/makegrmpart/main.nf | 2 +- modules/nf-core/gcta/makegrmpart/meta.yml | 4 ++-- modules/nf-core/gcta/makegrmpart/tests/main.nf.test.snap | 6 +++--- modules/nf-core/gcta/reml/main.nf | 2 +- modules/nf-core/gcta/reml/meta.yml | 4 ++-- modules/nf-core/gcta/reml/tests/main.nf.test.snap | 6 +++--- modules/nf-core/gcta/remlldms/main.nf | 2 +- modules/nf-core/gcta/remlldms/meta.yml | 4 ++-- modules/nf-core/gcta/remlldms/tests/main.nf.test.snap | 6 +++--- modules/nf-core/gcta/removerelatedsubjects/main.nf | 2 +- modules/nf-core/gcta/removerelatedsubjects/meta.yml | 4 ++-- .../gcta/removerelatedsubjects/tests/main.nf.test.snap | 6 +++--- 36 files changed, 73 insertions(+), 73 deletions(-) diff --git a/modules/nf-core/gcta/addgrms/main.nf b/modules/nf-core/gcta/addgrms/main.nf index 22fb49319ff8..74e7c65959d7 100644 --- a/modules/nf-core/gcta/addgrms/main.nf +++ b/modules/nf-core/gcta/addgrms/main.nf @@ -11,7 +11,7 @@ process GCTA_ADDGRMS { output: tuple val(meta), path("${meta.id}.grm.id"), path("${meta.id}.grm.bin"), path("${meta.id}.grm.N.bin"), emit: combined_grm - tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/gcta/addgrms/meta.yml b/modules/nf-core/gcta/addgrms/meta.yml index 5e91cfd6cb9f..06b3d7afb7e6 100644 --- a/modules/nf-core/gcta/addgrms/meta.yml +++ b/modules/nf-core/gcta/addgrms/meta.yml @@ -59,7 +59,7 @@ output: - "gcta": type: string description: The tool name - - "gcta --version 2>&1 | head -n 1": + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": type: eval description: The command used to retrieve the GCTA version @@ -71,7 +71,7 @@ topics: - gcta: type: string description: The tool name - - gcta --version 2>&1 | head -n 1: + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': type: eval description: The command used to retrieve the GCTA version diff --git a/modules/nf-core/gcta/addgrms/tests/main.nf.test.snap b/modules/nf-core/gcta/addgrms/tests/main.nf.test.snap index c25e7e06baec..74190b563549 100644 --- a/modules/nf-core/gcta/addgrms/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/addgrms/tests/main.nf.test.snap @@ -16,7 +16,7 @@ [ "GCTA_ADDGRMS", "gcta", - "*******************************************************************" + "1.94.1" ] ], "combined_grm": [ @@ -33,7 +33,7 @@ [ "GCTA_ADDGRMS", "gcta", - "*******************************************************************" + "1.94.1" ] ] } @@ -61,7 +61,7 @@ [ "GCTA_ADDGRMS", "gcta", - "*******************************************************************" + "1.94.1" ] ] } diff --git a/modules/nf-core/gcta/adjustgrm/main.nf b/modules/nf-core/gcta/adjustgrm/main.nf index ced05e67a134..9f61149355f9 100644 --- a/modules/nf-core/gcta/adjustgrm/main.nf +++ b/modules/nf-core/gcta/adjustgrm/main.nf @@ -11,7 +11,7 @@ process GCTA_ADJUSTGRM { output: tuple val(meta), path("${meta.id}_adj.grm.id"), path("${meta.id}_adj.grm.bin"), path("${meta.id}_adj.grm.N.bin"), emit: grm_files - tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/gcta/adjustgrm/meta.yml b/modules/nf-core/gcta/adjustgrm/meta.yml index b191a8a03f32..a8d311018281 100644 --- a/modules/nf-core/gcta/adjustgrm/meta.yml +++ b/modules/nf-core/gcta/adjustgrm/meta.yml @@ -63,7 +63,7 @@ output: - "gcta": type: string description: The tool name - - "gcta --version 2>&1 | head -n 1": + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": type: eval description: The command used to retrieve the GCTA version @@ -75,7 +75,7 @@ topics: - gcta: type: string description: The tool name - - gcta --version 2>&1 | head -n 1: + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': type: eval description: The command used to retrieve the GCTA version diff --git a/modules/nf-core/gcta/adjustgrm/tests/main.nf.test.snap b/modules/nf-core/gcta/adjustgrm/tests/main.nf.test.snap index 6ed461bbd2c6..04fcb7aebad7 100644 --- a/modules/nf-core/gcta/adjustgrm/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/adjustgrm/tests/main.nf.test.snap @@ -16,7 +16,7 @@ [ "GCTA_ADJUSTGRM", "gcta", - "*******************************************************************" + "1.94.1" ] ], "grm_files": [ @@ -33,7 +33,7 @@ [ "GCTA_ADJUSTGRM", "gcta", - "*******************************************************************" + "1.94.1" ] ] } @@ -61,7 +61,7 @@ [ "GCTA_ADJUSTGRM", "gcta", - "*******************************************************************" + "1.94.1" ] ] } diff --git a/modules/nf-core/gcta/bivariatereml/main.nf b/modules/nf-core/gcta/bivariatereml/main.nf index 9bf4829b6239..039b7f295ad0 100644 --- a/modules/nf-core/gcta/bivariatereml/main.nf +++ b/modules/nf-core/gcta/bivariatereml/main.nf @@ -15,7 +15,7 @@ process GCTA_BIVARIATEREML { output: tuple val(meta), path("${meta.id}.hsq"), emit: bivariate_results tuple val(meta), path("${meta.id}.log"), emit: log_file - tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/gcta/bivariatereml/meta.yml b/modules/nf-core/gcta/bivariatereml/meta.yml index e87314bc570b..7b64fd3a392c 100644 --- a/modules/nf-core/gcta/bivariatereml/meta.yml +++ b/modules/nf-core/gcta/bivariatereml/meta.yml @@ -98,7 +98,7 @@ output: - "gcta": type: string description: The tool name - - "gcta --version 2>&1 | head -n 1": + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": type: eval description: The command used to retrieve the GCTA version topics: @@ -109,7 +109,7 @@ topics: - gcta: type: string description: The tool name - - gcta --version 2>&1 | head -n 1: + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': type: eval description: The command used to retrieve the GCTA version authors: diff --git a/modules/nf-core/gcta/bivariatereml/tests/main.nf.test.snap b/modules/nf-core/gcta/bivariatereml/tests/main.nf.test.snap index aeab61112211..ff68ab71f489 100644 --- a/modules/nf-core/gcta/bivariatereml/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/bivariatereml/tests/main.nf.test.snap @@ -14,7 +14,7 @@ [ "GCTA_BIVARIATEREML", "gcta", - "*******************************************************************" + "1.94.1" ] ] } @@ -48,7 +48,7 @@ [ "GCTA_BIVARIATEREML", "gcta", - "*******************************************************************" + "1.94.1" ] ], "bivariate_results": [ @@ -71,7 +71,7 @@ [ "GCTA_BIVARIATEREML", "gcta", - "*******************************************************************" + "1.94.1" ] ] } diff --git a/modules/nf-core/gcta/bivariateremlldms/main.nf b/modules/nf-core/gcta/bivariateremlldms/main.nf index 6d706efbca8d..fd34a7a5699c 100644 --- a/modules/nf-core/gcta/bivariateremlldms/main.nf +++ b/modules/nf-core/gcta/bivariateremlldms/main.nf @@ -15,7 +15,7 @@ process GCTA_BIVARIATEREMLLDMS { output: tuple val(meta), path("${meta.id}.hsq"), emit: bivariate_results tuple val(meta), path("${meta.id}.log"), emit: log_file - tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/gcta/bivariateremlldms/meta.yml b/modules/nf-core/gcta/bivariateremlldms/meta.yml index 03d3f8509889..1cfafb2a1320 100644 --- a/modules/nf-core/gcta/bivariateremlldms/meta.yml +++ b/modules/nf-core/gcta/bivariateremlldms/meta.yml @@ -95,7 +95,7 @@ output: - "gcta": type: string description: The tool name - - "gcta --version 2>&1 | head -n 1": + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": type: eval description: The command used to retrieve the GCTA version topics: @@ -106,7 +106,7 @@ topics: - gcta: type: string description: The tool name - - gcta --version 2>&1 | head -n 1: + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': type: eval description: The command used to retrieve the GCTA version authors: diff --git a/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test.snap b/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test.snap index 6289a546a78b..44d0b0b2ca71 100644 --- a/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test.snap @@ -36,7 +36,7 @@ [ "GCTA_BIVARIATEREMLLDMS", "gcta", - "*******************************************************************" + "1.94.1" ] ] } @@ -70,7 +70,7 @@ [ "GCTA_BIVARIATEREMLLDMS", "gcta", - "*******************************************************************" + "1.94.1" ] ], "bivariate_results": [ @@ -93,7 +93,7 @@ [ "GCTA_BIVARIATEREMLLDMS", "gcta", - "*******************************************************************" + "1.94.1" ] ] } diff --git a/modules/nf-core/gcta/calculateldscores/main.nf b/modules/nf-core/gcta/calculateldscores/main.nf index 3c37acd23183..8ee1b4d57828 100644 --- a/modules/nf-core/gcta/calculateldscores/main.nf +++ b/modules/nf-core/gcta/calculateldscores/main.nf @@ -13,7 +13,7 @@ process GCTA_CALCULATELDSCORES { output: tuple val(meta), path("${meta.id}_gcta_ld.score.ld"), emit: ld_scores tuple val(meta), path("${meta.id}_snp_group1.txt"), path("${meta.id}_snp_group2.txt"), path("${meta.id}_snp_group3.txt"), path("${meta.id}_snp_group4.txt"), emit: snp_group_files - tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/gcta/calculateldscores/meta.yml b/modules/nf-core/gcta/calculateldscores/meta.yml index 725948766c5b..666d1128895b 100644 --- a/modules/nf-core/gcta/calculateldscores/meta.yml +++ b/modules/nf-core/gcta/calculateldscores/meta.yml @@ -90,7 +90,7 @@ output: - "gcta": type: string description: The tool name - - "gcta --version 2>&1 | head -n 1": + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": type: eval description: The command used to generate the version of the tool topics: @@ -101,7 +101,7 @@ topics: - gcta: type: string description: The tool name - - gcta --version 2>&1 | head -n 1: + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': type: eval description: The command used to generate the version of the tool authors: diff --git a/modules/nf-core/gcta/calculateldscores/tests/main.nf.test.snap b/modules/nf-core/gcta/calculateldscores/tests/main.nf.test.snap index 441aec6cda44..287c097cf2c8 100644 --- a/modules/nf-core/gcta/calculateldscores/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/calculateldscores/tests/main.nf.test.snap @@ -14,7 +14,7 @@ [ "GCTA_CALCULATELDSCORES", "gcta", - "*******************************************************************" + "1.94.1" ] ] } @@ -51,7 +51,7 @@ [ "GCTA_CALCULATELDSCORES", "gcta", - "*******************************************************************" + "1.94.1" ] ], "ld_scores": [ @@ -77,7 +77,7 @@ [ "GCTA_CALCULATELDSCORES", "gcta", - "*******************************************************************" + "1.94.1" ] ] } diff --git a/modules/nf-core/gcta/fastgwa/main.nf b/modules/nf-core/gcta/fastgwa/main.nf index 7895be0a5a40..be40a6309be5 100644 --- a/modules/nf-core/gcta/fastgwa/main.nf +++ b/modules/nf-core/gcta/fastgwa/main.nf @@ -16,7 +16,7 @@ process GCTA_FASTGWA { output: tuple val(meta), val(meta3), path("${meta.id}_${meta3.id}.fastGWA"), emit: results - tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/gcta/fastgwa/meta.yml b/modules/nf-core/gcta/fastgwa/meta.yml index e6f2cd00e336..768eb1ec3189 100644 --- a/modules/nf-core/gcta/fastgwa/meta.yml +++ b/modules/nf-core/gcta/fastgwa/meta.yml @@ -115,7 +115,7 @@ output: - "gcta": type: string description: The tool name - - "gcta --version 2>&1 | head -n 1": + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": type: eval description: The command used to retrieve the GCTA version topics: @@ -126,7 +126,7 @@ topics: - gcta: type: string description: The tool name - - gcta --version 2>&1 | head -n 1: + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': type: eval description: The command used to retrieve the GCTA version authors: diff --git a/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap b/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap index e94665a7d33a..49e16c76984c 100644 --- a/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap @@ -18,7 +18,7 @@ [ "GCTA_FASTGWA", "gcta", - "*******************************************************************" + "1.94.1" ] ], "results": [ @@ -37,7 +37,7 @@ [ "GCTA_FASTGWA", "gcta", - "*******************************************************************" + "1.94.1" ] ] } @@ -67,7 +67,7 @@ [ "GCTA_FASTGWA", "gcta", - "*******************************************************************" + "1.94.1" ] ] } @@ -97,7 +97,7 @@ [ "GCTA_FASTGWA", "gcta", - "*******************************************************************" + "1.94.1" ] ] } diff --git a/modules/nf-core/gcta/filtergrmwithkeep/main.nf b/modules/nf-core/gcta/filtergrmwithkeep/main.nf index 9701e66bc0e1..f0afdf932e7d 100644 --- a/modules/nf-core/gcta/filtergrmwithkeep/main.nf +++ b/modules/nf-core/gcta/filtergrmwithkeep/main.nf @@ -12,7 +12,7 @@ process GCTA_FILTERGRMWITHKEEP { output: tuple val(meta), path("${meta.id}_unrel.grm.id"), path("${meta.id}_unrel.grm.bin"), path("${meta.id}_unrel.grm.N.bin"), emit: filtered_grm - tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/gcta/filtergrmwithkeep/meta.yml b/modules/nf-core/gcta/filtergrmwithkeep/meta.yml index e7dfa3fe898f..88788f8a6de1 100644 --- a/modules/nf-core/gcta/filtergrmwithkeep/meta.yml +++ b/modules/nf-core/gcta/filtergrmwithkeep/meta.yml @@ -74,7 +74,7 @@ output: - "gcta": type: string description: The tool name - - "gcta --version 2>&1 | head -n 1": + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": type: eval description: The command used to retrieve the GCTA version @@ -86,7 +86,7 @@ topics: - gcta: type: string description: The tool name - - gcta --version 2>&1 | head -n 1: + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': type: eval description: The command used to retrieve the GCTA version diff --git a/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test.snap b/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test.snap index 85fbfd385934..949d1ab7da03 100644 --- a/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test.snap @@ -16,7 +16,7 @@ [ "GCTA_FILTERGRMWITHKEEP", "gcta", - "*******************************************************************" + "1.94.1" ] ] } @@ -44,7 +44,7 @@ [ "GCTA_FILTERGRMWITHKEEP", "gcta", - "*******************************************************************" + "1.94.1" ] ], "filtered_grm": [ @@ -61,7 +61,7 @@ [ "GCTA_FILTERGRMWITHKEEP", "gcta", - "*******************************************************************" + "1.94.1" ] ] } diff --git a/modules/nf-core/gcta/makebksparse/main.nf b/modules/nf-core/gcta/makebksparse/main.nf index d2cd63987795..7accab6b7c94 100644 --- a/modules/nf-core/gcta/makebksparse/main.nf +++ b/modules/nf-core/gcta/makebksparse/main.nf @@ -12,7 +12,7 @@ process GCTA_MAKEBKSPARSE { output: tuple val(meta), path("${meta.id}_sp.grm.id"), path("${meta.id}_sp.grm.sp"), emit: sparse_grm_files - tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/gcta/makebksparse/meta.yml b/modules/nf-core/gcta/makebksparse/meta.yml index dc17568d739f..c619ab38ede6 100644 --- a/modules/nf-core/gcta/makebksparse/meta.yml +++ b/modules/nf-core/gcta/makebksparse/meta.yml @@ -62,7 +62,7 @@ output: - "gcta": type: string description: The tool name - - "gcta --version 2>&1 | head -n 1": + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": type: eval description: The command used to retrieve the GCTA version @@ -74,7 +74,7 @@ topics: - gcta: type: string description: The tool name - - gcta --version 2>&1 | head -n 1: + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': type: eval description: The command used to retrieve the GCTA version diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap index 8ad77892ba34..42828835a289 100644 --- a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap @@ -15,7 +15,7 @@ [ "GCTA_MAKEBKSPARSE", "gcta", - "*******************************************************************" + "1.94.1" ] ] } @@ -42,7 +42,7 @@ [ "GCTA_MAKEBKSPARSE", "gcta", - "*******************************************************************" + "1.94.1" ] ], "sparse_grm_files": [ @@ -58,7 +58,7 @@ [ "GCTA_MAKEBKSPARSE", "gcta", - "*******************************************************************" + "1.94.1" ] ] } diff --git a/modules/nf-core/gcta/makegrmpart/main.nf b/modules/nf-core/gcta/makegrmpart/main.nf index b234712c1525..f3398ee515db 100644 --- a/modules/nf-core/gcta/makegrmpart/main.nf +++ b/modules/nf-core/gcta/makegrmpart/main.nf @@ -12,7 +12,7 @@ process GCTA_MAKEGRMPART { output: tuple val(meta), path("${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.id"), path("${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.bin"), path("${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.N.bin"), emit: grm_files - tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/gcta/makegrmpart/meta.yml b/modules/nf-core/gcta/makegrmpart/meta.yml index fbcf0aef6c4d..c29dfb89a8bc 100644 --- a/modules/nf-core/gcta/makegrmpart/meta.yml +++ b/modules/nf-core/gcta/makegrmpart/meta.yml @@ -80,7 +80,7 @@ output: - "gcta": type: string description: The tool name - - "gcta --version 2>&1 | head -n 1": + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": type: eval description: The command used to generate the version of the tool topics: @@ -91,7 +91,7 @@ topics: - gcta: type: string description: The tool name - - gcta --version 2>&1 | head -n 1: + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': type: eval description: The command used to generate the version of the tool authors: diff --git a/modules/nf-core/gcta/makegrmpart/tests/main.nf.test.snap b/modules/nf-core/gcta/makegrmpart/tests/main.nf.test.snap index 49ba14aa5f41..0ba4cfbf26bc 100644 --- a/modules/nf-core/gcta/makegrmpart/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/makegrmpart/tests/main.nf.test.snap @@ -18,7 +18,7 @@ [ "GCTA_MAKEGRMPART", "gcta", - "*******************************************************************" + "1.94.1" ] ] } @@ -48,7 +48,7 @@ [ "GCTA_MAKEGRMPART", "gcta", - "*******************************************************************" + "1.94.1" ] ], "grm_files": [ @@ -67,7 +67,7 @@ [ "GCTA_MAKEGRMPART", "gcta", - "*******************************************************************" + "1.94.1" ] ] } diff --git a/modules/nf-core/gcta/reml/main.nf b/modules/nf-core/gcta/reml/main.nf index 07f26d88f36b..1fd886187b57 100644 --- a/modules/nf-core/gcta/reml/main.nf +++ b/modules/nf-core/gcta/reml/main.nf @@ -15,7 +15,7 @@ process GCTA_REML { output: tuple val(meta), path("${meta.id}.hsq"), emit: reml_results - tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/gcta/reml/meta.yml b/modules/nf-core/gcta/reml/meta.yml index 3f3fb01cf4fc..d5e9c3c8e97e 100644 --- a/modules/nf-core/gcta/reml/meta.yml +++ b/modules/nf-core/gcta/reml/meta.yml @@ -85,7 +85,7 @@ output: - "gcta": type: string description: The tool name - - "gcta --version 2>&1 | head -n 1": + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": type: eval description: The command used to retrieve the GCTA version topics: @@ -96,7 +96,7 @@ topics: - gcta: type: string description: The tool name - - gcta --version 2>&1 | head -n 1: + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': type: eval description: The command used to retrieve the GCTA version authors: diff --git a/modules/nf-core/gcta/reml/tests/main.nf.test.snap b/modules/nf-core/gcta/reml/tests/main.nf.test.snap index 7eef71fdd6eb..6e68e584197b 100644 --- a/modules/nf-core/gcta/reml/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/reml/tests/main.nf.test.snap @@ -14,7 +14,7 @@ [ "GCTA_REML", "gcta", - "*******************************************************************" + "1.94.1" ] ], "reml_results": [ @@ -29,7 +29,7 @@ [ "GCTA_REML", "gcta", - "*******************************************************************" + "1.94.1" ] ] } @@ -55,7 +55,7 @@ [ "GCTA_REML", "gcta", - "*******************************************************************" + "1.94.1" ] ] } diff --git a/modules/nf-core/gcta/remlldms/main.nf b/modules/nf-core/gcta/remlldms/main.nf index d3436b62f40b..2ef8bde73139 100644 --- a/modules/nf-core/gcta/remlldms/main.nf +++ b/modules/nf-core/gcta/remlldms/main.nf @@ -14,7 +14,7 @@ process GCTA_REMLLDMS { output: tuple val(meta), path("${meta.id}.hsq"), emit: reml_results - tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/gcta/remlldms/meta.yml b/modules/nf-core/gcta/remlldms/meta.yml index 438e958632a4..0705bd2438e0 100644 --- a/modules/nf-core/gcta/remlldms/meta.yml +++ b/modules/nf-core/gcta/remlldms/meta.yml @@ -82,7 +82,7 @@ output: - "gcta": type: string description: The tool name - - "gcta --version 2>&1 | head -n 1": + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": type: eval description: The command used to retrieve the GCTA version topics: @@ -93,7 +93,7 @@ topics: - gcta: type: string description: The tool name - - gcta --version 2>&1 | head -n 1: + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': type: eval description: The command used to retrieve the GCTA version authors: diff --git a/modules/nf-core/gcta/remlldms/tests/main.nf.test.snap b/modules/nf-core/gcta/remlldms/tests/main.nf.test.snap index 77349463ed82..240b284b7cf3 100644 --- a/modules/nf-core/gcta/remlldms/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/remlldms/tests/main.nf.test.snap @@ -14,7 +14,7 @@ [ "GCTA_REMLLDMS", "gcta", - "*******************************************************************" + "1.94.1" ] ], "reml_results": [ @@ -29,7 +29,7 @@ [ "GCTA_REMLLDMS", "gcta", - "*******************************************************************" + "1.94.1" ] ] } @@ -55,7 +55,7 @@ [ "GCTA_REMLLDMS", "gcta", - "*******************************************************************" + "1.94.1" ] ] } diff --git a/modules/nf-core/gcta/removerelatedsubjects/main.nf b/modules/nf-core/gcta/removerelatedsubjects/main.nf index 2214805c8ce5..3f90d2f388eb 100644 --- a/modules/nf-core/gcta/removerelatedsubjects/main.nf +++ b/modules/nf-core/gcta/removerelatedsubjects/main.nf @@ -12,7 +12,7 @@ process GCTA_REMOVERELATEDSUBJECTS { output: tuple val(meta), path("${meta.id}_unrel05.grm.id"), path("${meta.id}_unrel05.grm.bin"), path("${meta.id}_unrel05.grm.N.bin"), emit: grm_files tuple val(meta), path("${meta.id}_unrel05.grm.id"), emit: keep_file - tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | head -n 1"), emit: versions_gcta, topic: versions + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/gcta/removerelatedsubjects/meta.yml b/modules/nf-core/gcta/removerelatedsubjects/meta.yml index d7864252d034..e8085c2564ee 100644 --- a/modules/nf-core/gcta/removerelatedsubjects/meta.yml +++ b/modules/nf-core/gcta/removerelatedsubjects/meta.yml @@ -74,7 +74,7 @@ output: - "gcta": type: string description: The tool name - - "gcta --version 2>&1 | head -n 1": + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": type: eval description: The command used to retrieve the GCTA version @@ -86,7 +86,7 @@ topics: - gcta: type: string description: The tool name - - gcta --version 2>&1 | head -n 1: + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': type: eval description: The command used to retrieve the GCTA version diff --git a/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test.snap b/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test.snap index cb84a8ad5065..0751d240524d 100644 --- a/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test.snap @@ -24,7 +24,7 @@ [ "GCTA_REMOVERELATEDSUBJECTS", "gcta", - "*******************************************************************" + "1.94.1" ] ] } @@ -60,7 +60,7 @@ [ "GCTA_REMOVERELATEDSUBJECTS", "gcta", - "*******************************************************************" + "1.94.1" ] ], "grm_files": [ @@ -85,7 +85,7 @@ [ "GCTA_REMOVERELATEDSUBJECTS", "gcta", - "*******************************************************************" + "1.94.1" ] ] } From d8b481e6d3f89d9e5e8ac742561234e11b628d43 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Tue, 17 Mar 2026 01:19:55 +0800 Subject: [PATCH 6/9] Add mpheno support to GCTA REML modules --- modules/nf-core/gcta/reml/main.nf | 2 + modules/nf-core/gcta/reml/meta.yml | 2 + modules/nf-core/gcta/reml/tests/main.nf.test | 52 ++++++++++++++++ .../nf-core/gcta/reml/tests/main.nf.test.snap | 31 +++++++++- modules/nf-core/gcta/remlldms/main.nf | 2 + modules/nf-core/gcta/remlldms/meta.yml | 2 + .../nf-core/gcta/remlldms/tests/main.nf.test | 59 +++++++++++++++++++ .../gcta/remlldms/tests/main.nf.test.snap | 29 ++++++++- 8 files changed, 176 insertions(+), 3 deletions(-) diff --git a/modules/nf-core/gcta/reml/main.nf b/modules/nf-core/gcta/reml/main.nf index 1fd886187b57..581d1334913d 100644 --- a/modules/nf-core/gcta/reml/main.nf +++ b/modules/nf-core/gcta/reml/main.nf @@ -21,6 +21,7 @@ process GCTA_REML { task.ext.when == null || task.ext.when script: + def mpheno_param = meta.mpheno ? "--mpheno ${meta.mpheno}" : '' def qcovar_param = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : '' def covar_param = cat_covariates_file ? "--covar ${cat_covariates_file}" : '' def extra_args = task.ext.args ?: '' @@ -32,6 +33,7 @@ process GCTA_REML { --reml \\ --grm ${meta2.id} \\ --pheno ${phenotypes_file} \\ + ${mpheno_param} \\ ${qcovar_param} \\ ${covar_param} \\ --out "${meta.id}" \\ diff --git a/modules/nf-core/gcta/reml/meta.yml b/modules/nf-core/gcta/reml/meta.yml index d5e9c3c8e97e..1913d1470210 100644 --- a/modules/nf-core/gcta/reml/meta.yml +++ b/modules/nf-core/gcta/reml/meta.yml @@ -17,6 +17,7 @@ input: description: | Groovy map containing phenotype metadata e.g. `[ id:'QuantitativeTrait' ]` + Optional phenotype selector may be supplied as `meta.mpheno` - phenotypes_file: type: file description: Phenotype file passed to `--pheno` @@ -72,6 +73,7 @@ output: description: | Groovy map containing phenotype metadata e.g. `[ id:'QuantitativeTrait' ]` + Preserves optional `meta.mpheno` when supplied - "${meta.id}.hsq": type: file description: REML result file diff --git a/modules/nf-core/gcta/reml/tests/main.nf.test b/modules/nf-core/gcta/reml/tests/main.nf.test index 274a986f35e8..81b8a7ce0996 100644 --- a/modules/nf-core/gcta/reml/tests/main.nf.test +++ b/modules/nf-core/gcta/reml/tests/main.nf.test @@ -26,6 +26,20 @@ nextflow_process { } } + run("GAWK", alias: "GAWK_MULTI_PHENO") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'QuantitativeTraits' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3, (\$3 * 1.7) + ((NR % 5) / 10.0) }').collectFile(name:'multi_phenotype.awk') + input[2] = false + """ + } + } + run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") { script "../../../gawk/main.nf" process { @@ -79,6 +93,44 @@ nextflow_process { } } + test("homo_sapiens popgen - dense GRM with mpheno selection") { + config "./nextflow.config" + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + multi_pheno = GAWK_MULTI_PHENO.out.output.map { meta, phenotype_file -> + [[ id:'QuantitativeTraitMpheno2', mpheno:2 ], phenotype_file] + } + + input[0] = multi_pheno + input[1] = dense_grm + input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[3] = GAWK_CATEGORICAL_COVARIATES.out.output + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.reml_results.size() == 1 }, + { assert process.out.reml_results.get(0).get(0).id == "QuantitativeTraitMpheno2" }, + { assert process.out.reml_results.get(0).get(0).mpheno == 2 }, + { + assert snapshot( + process.out.reml_results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + test("homo_sapiens popgen - quantitative phenotype with dense GRM and covariates") { config "./nextflow.config" when { diff --git a/modules/nf-core/gcta/reml/tests/main.nf.test.snap b/modules/nf-core/gcta/reml/tests/main.nf.test.snap index 6e68e584197b..d71d3298c14f 100644 --- a/modules/nf-core/gcta/reml/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/reml/tests/main.nf.test.snap @@ -64,6 +64,33 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T15:36:28.705452813" + "timestamp": "2026-03-16T19:09:30.415321363" + }, + "homo_sapiens popgen - dense GRM with mpheno selection": { + "content": [ + [ + [ + { + "id": "QuantitativeTraitMpheno2", + "mpheno": 2 + }, + "QuantitativeTraitMpheno2.hsq:md5,200834277b8618736f6cd522005d3838" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_REML", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-16T19:10:10.617216844" } -} \ No newline at end of file +} diff --git a/modules/nf-core/gcta/remlldms/main.nf b/modules/nf-core/gcta/remlldms/main.nf index 2ef8bde73139..e8a5ea6e4c25 100644 --- a/modules/nf-core/gcta/remlldms/main.nf +++ b/modules/nf-core/gcta/remlldms/main.nf @@ -20,6 +20,7 @@ process GCTA_REMLLDMS { task.ext.when == null || task.ext.when script: + def mpheno_param = meta.mpheno ? "--mpheno ${meta.mpheno}" : '' def qcovar_param = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : '' def covar_param = cat_covariates_file ? "--covar ${cat_covariates_file}" : '' def extra_args = task.ext.args ?: '' @@ -31,6 +32,7 @@ process GCTA_REMLLDMS { --reml-no-constrain \\ --mgrm ${mgrm_file} \\ --pheno ${phenotypes_file} \\ + ${mpheno_param} \\ ${qcovar_param} \\ ${covar_param} \\ --out "${meta.id}" \\ diff --git a/modules/nf-core/gcta/remlldms/meta.yml b/modules/nf-core/gcta/remlldms/meta.yml index 0705bd2438e0..7242477193a7 100644 --- a/modules/nf-core/gcta/remlldms/meta.yml +++ b/modules/nf-core/gcta/remlldms/meta.yml @@ -18,6 +18,7 @@ input: description: | Groovy map containing phenotype metadata e.g. `[ id:'QuantitativeTrait' ]` + Optional phenotype selector may be supplied as `meta.mpheno` - phenotypes_file: type: file description: Phenotype file passed to `--pheno` @@ -69,6 +70,7 @@ output: description: | Groovy map containing phenotype metadata e.g. `[ id:'QuantitativeTrait' ]` + Preserves optional `meta.mpheno` when supplied - "${meta.id}.hsq": type: file description: REML-LDMS result file diff --git a/modules/nf-core/gcta/remlldms/tests/main.nf.test b/modules/nf-core/gcta/remlldms/tests/main.nf.test index 7cf0bc13c8af..bd3964215e43 100644 --- a/modules/nf-core/gcta/remlldms/tests/main.nf.test +++ b/modules/nf-core/gcta/remlldms/tests/main.nf.test @@ -26,6 +26,20 @@ nextflow_process { } } + run("GAWK", alias: "GAWK_MULTI_PHENO") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'QuantitativeTraits' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) + ] + input[1] = Channel.of('{ trait1 = ((NR % 11) - 5) + (((NR * NR) % 7) / 10.0); print \$1, \$2, trait1, trait1 + ((NR % 5) / 10.0) }').collectFile(name:'multi_phenotype.awk') + input[2] = false + """ + } + } + run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") { script "../../../gawk/main.nf" process { @@ -131,6 +145,51 @@ nextflow_process { } } + test("homo_sapiens gsmr - ldms mgrm with mpheno selection") { + config "./nextflow.config" + when { + process { + """ + mgrm_file = Channel + .of('bfile_ldms1.part_1_1\\nbfile_ldms2.part_1_1') + .collectFile(name:'bfile_ldms.mgrm', newLine: true) + + ldms_grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files + .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files) + .map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] } + .collect() + .map { rows -> rows.flatten() } + + multi_pheno = GAWK_MULTI_PHENO.out.output.map { meta, phenotype_file -> + [[ id:'QuantitativeTraitMpheno2', mpheno:2 ], phenotype_file] + } + + input[0] = multi_pheno + input[1] = mgrm_file + .combine(ldms_grm_files) + .map { row -> [[ id:'bfile_ldms' ], row[0], row[1..-1]] } + input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[3] = GAWK_CATEGORICAL_COVARIATES.out.output + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.reml_results.size() == 1 }, + { assert process.out.reml_results.get(0).get(0).id == "QuantitativeTraitMpheno2" }, + { assert process.out.reml_results.get(0).get(0).mpheno == 2 }, + { + assert snapshot( + process.out.reml_results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + test("homo_sapiens gsmr - quantitative phenotype with ldms mgrm and covariates") { config "./nextflow.config" when { diff --git a/modules/nf-core/gcta/remlldms/tests/main.nf.test.snap b/modules/nf-core/gcta/remlldms/tests/main.nf.test.snap index 240b284b7cf3..1fd85202b4af 100644 --- a/modules/nf-core/gcta/remlldms/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/remlldms/tests/main.nf.test.snap @@ -40,6 +40,33 @@ }, "timestamp": "2026-03-13T15:57:32.371358163" }, + "homo_sapiens gsmr - ldms mgrm with mpheno selection": { + "content": [ + [ + [ + { + "id": "QuantitativeTraitMpheno2", + "mpheno": 2 + }, + "QuantitativeTraitMpheno2.hsq:md5,2ceb8590010a6e274f8339b3c77e18ef" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_REMLLDMS", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-16T19:11:03.430185704" + }, "homo_sapiens gsmr - quantitative phenotype with ldms mgrm and covariates": { "content": [ [ @@ -64,6 +91,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T15:57:18.520897746" + "timestamp": "2026-03-16T19:11:15.129305749" } } From 3e5cfbc8fa4420b27a3366cdf4f97b416934cca6 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Tue, 17 Mar 2026 01:20:02 +0800 Subject: [PATCH 7/9] Add explicit grm-adj input to GCTA ADJUSTGRM --- modules/nf-core/gcta/adjustgrm/main.nf | 4 ++- modules/nf-core/gcta/adjustgrm/meta.yml | 5 +++ .../nf-core/gcta/adjustgrm/tests/main.nf.test | 34 +++++++++++++++++++ .../gcta/adjustgrm/tests/main.nf.test.snap | 30 +++++++++++++++- 4 files changed, 71 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/gcta/adjustgrm/main.nf b/modules/nf-core/gcta/adjustgrm/main.nf index 9f61149355f9..137011a9bf9e 100644 --- a/modules/nf-core/gcta/adjustgrm/main.nf +++ b/modules/nf-core/gcta/adjustgrm/main.nf @@ -8,6 +8,7 @@ process GCTA_ADJUSTGRM { input: tuple val(meta), path(grm_id), path(grm_bin), path(grm_n_bin) + val grm_adj output: tuple val(meta), path("${meta.id}_adj.grm.id"), path("${meta.id}_adj.grm.bin"), path("${meta.id}_adj.grm.N.bin"), emit: grm_files @@ -18,11 +19,12 @@ process GCTA_ADJUSTGRM { script: def args = task.ext.args ?: '' + def grm_adj_value = (grm_adj == null || grm_adj == '') ? 0 : grm_adj """ gcta \\ --grm ${meta.id} \\ - --grm-adj 0 \\ + --grm-adj ${grm_adj_value} \\ --make-grm \\ --out ${meta.id}_adj \\ --thread-num ${task.cpus} \\ diff --git a/modules/nf-core/gcta/adjustgrm/meta.yml b/modules/nf-core/gcta/adjustgrm/meta.yml index a8d311018281..0626c8484f6c 100644 --- a/modules/nf-core/gcta/adjustgrm/meta.yml +++ b/modules/nf-core/gcta/adjustgrm/meta.yml @@ -33,6 +33,11 @@ input: description: Dense GRM sample-count matrix file pattern: "*.grm.N.bin" ontologies: [] + - grm_adj: + type: number + description: | + GRM adjustment value passed to `--grm-adj`. + When an empty string is supplied, the module falls back to `0`. output: grm_files: diff --git a/modules/nf-core/gcta/adjustgrm/tests/main.nf.test b/modules/nf-core/gcta/adjustgrm/tests/main.nf.test index 76be2d28d935..947453dc0866 100644 --- a/modules/nf-core/gcta/adjustgrm/tests/main.nf.test +++ b/modules/nf-core/gcta/adjustgrm/tests/main.nf.test @@ -48,6 +48,39 @@ nextflow_process { } input[0] = dense_grm + input[1] = 1 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.grm_files.size() == 1 }, + { assert process.out.grm_files.get(0).get(0).id == "plink_simulated_dense.part_1_1" }, + { + assert snapshot( + process.out.grm_files, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - adjust dense GRM with fallback default") { + config "./nextflow.config" + + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm + input[1] = '' """ } } @@ -80,6 +113,7 @@ nextflow_process { } input[0] = dense_grm + input[1] = 1 """ } } diff --git a/modules/nf-core/gcta/adjustgrm/tests/main.nf.test.snap b/modules/nf-core/gcta/adjustgrm/tests/main.nf.test.snap index 04fcb7aebad7..47867f8dcb57 100644 --- a/modules/nf-core/gcta/adjustgrm/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/adjustgrm/tests/main.nf.test.snap @@ -45,6 +45,34 @@ "timestamp": "2026-03-13T15:35:38.715590031" }, "homo_sapiens popgen - adjust dense GRM": { + "content": [ + [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_adj.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "plink_simulated_dense.part_1_1_adj.grm.bin:md5,2daf6b143fde26dfe8e340237443ffaf", + "plink_simulated_dense.part_1_1_adj.grm.N.bin:md5,acaa43bbbf2253d392537a178ecf09a4" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_ADJUSTGRM", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-16T23:53:57.519973105" + }, + "homo_sapiens popgen - adjust dense GRM with fallback default": { "content": [ [ [ @@ -70,6 +98,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T15:35:32.503491346" + "timestamp": "2026-03-17T00:00:08.732593482" } } \ No newline at end of file From d92f6dccab5cd6d3086a17aaa4791025e7482521 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Tue, 17 Mar 2026 23:15:03 +0800 Subject: [PATCH 8/9] fix: use valid meta schema type for gcta adjustgrm grm_adj --- modules/nf-core/gcta/adjustgrm/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/gcta/adjustgrm/meta.yml b/modules/nf-core/gcta/adjustgrm/meta.yml index 0626c8484f6c..6d093af721e4 100644 --- a/modules/nf-core/gcta/adjustgrm/meta.yml +++ b/modules/nf-core/gcta/adjustgrm/meta.yml @@ -34,7 +34,7 @@ input: pattern: "*.grm.N.bin" ontologies: [] - grm_adj: - type: number + type: integer description: | GRM adjustment value passed to `--grm-adj`. When an empty string is supplied, the module falls back to `0`. From e6f5122e40c185e952b3fbc8c75264d63149c7e3 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Wed, 18 Mar 2026 19:58:54 +0800 Subject: [PATCH 9/9] Update GCTA snapshots for refreshed popgen fixtures --- .../nf-core/gcta/bivariatereml/tests/main.nf.test.snap | 4 ++-- modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap | 6 +++--- modules/nf-core/gcta/reml/tests/main.nf.test.snap | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/nf-core/gcta/bivariatereml/tests/main.nf.test.snap b/modules/nf-core/gcta/bivariatereml/tests/main.nf.test.snap index ff68ab71f489..d16589847d48 100644 --- a/modules/nf-core/gcta/bivariatereml/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/bivariatereml/tests/main.nf.test.snap @@ -6,7 +6,7 @@ { "id": "Trait1__Trait2" }, - "Trait1__Trait2.hsq:md5,4fe310d5073a497f459e33ee7aa357a4" + "Trait1__Trait2.hsq:md5,dab8c6af3e42c9e359825f8f7e6e6fce" ] ], { @@ -23,7 +23,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T15:37:23.03169267" + "timestamp": "2026-03-18T19:00:59.97128964" }, "homo_sapiens popgen - bivariate phenotype with dense GRM - stub": { "content": [ diff --git a/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap b/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap index 49e16c76984c..b3bd73e97fb5 100644 --- a/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap @@ -59,7 +59,7 @@ "id": "QuantitativeTrait", "is_binary": false }, - "plink_simulated_QuantitativeTrait.fastGWA:md5,ba64c9460f412ffa7afb4060eaa029e4" + "plink_simulated_QuantitativeTrait.fastGWA:md5,d9190e07273a3de2a15a6e7053aed487" ] ], { @@ -76,7 +76,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T18:11:54.793137401" + "timestamp": "2026-03-18T19:01:13.458535259" }, "homo_sapiens popgen - plink1 with sparse GRM and binary phenotype": { "content": [ @@ -108,4 +108,4 @@ }, "timestamp": "2026-03-13T18:15:22.74128729" } -} +} \ No newline at end of file diff --git a/modules/nf-core/gcta/reml/tests/main.nf.test.snap b/modules/nf-core/gcta/reml/tests/main.nf.test.snap index d71d3298c14f..195b2991921f 100644 --- a/modules/nf-core/gcta/reml/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/reml/tests/main.nf.test.snap @@ -47,7 +47,7 @@ { "id": "QuantitativeTrait" }, - "QuantitativeTrait.hsq:md5,43dbe0c6efdafaaf2a19819c9a47e2d5" + "QuantitativeTrait.hsq:md5,a1a3eb919cf7aec392435b4bf36ae788" ] ], { @@ -64,7 +64,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-16T19:09:30.415321363" + "timestamp": "2026-03-18T19:01:39.933270203" }, "homo_sapiens popgen - dense GRM with mpheno selection": { "content": [ @@ -74,7 +74,7 @@ "id": "QuantitativeTraitMpheno2", "mpheno": 2 }, - "QuantitativeTraitMpheno2.hsq:md5,200834277b8618736f6cd522005d3838" + "QuantitativeTraitMpheno2.hsq:md5,47a16182353f1c15a9b1408ee02bdcdc" ] ], { @@ -91,6 +91,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-16T19:10:10.617216844" + "timestamp": "2026-03-18T19:01:34.187379358" } -} +} \ No newline at end of file