From d188642749911fad28913f6a72e4bca454ac5151 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Tue, 21 Oct 2025 14:58:38 -0700 Subject: [PATCH 1/5] Add slot has_newer_version to WorkflowExecution and DataObject --- src/schema/basic_classes.yaml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/schema/basic_classes.yaml b/src/schema/basic_classes.yaml index 1898e29128..a5a6fc4fb9 100644 --- a/src/schema/basic_classes.yaml +++ b/src/schema/basic_classes.yaml @@ -368,6 +368,7 @@ classes: - url - was_generated_by - in_manifest + - has_newer_version slot_usage: name: required: true @@ -387,6 +388,13 @@ classes: required: true data_category: required: true + has_newer_version: + range: DataObject + structured_pattern: + syntax: "{id_nmdc_prefix}:dobj-{id_shoulder}-{id_blade}$" + interpolated: true + description: >- + Links to a newer version of this DataObject, if one exists. DataEmitterProcess: class_uri: nmdc:DataEmitterProcess is_a: PlannedProcess @@ -462,6 +470,7 @@ classes: - version - was_informed_by - processing_institution_workflow_metadata + - has_newer_version rules: - title: qc_status_pass_has_output_required description: >- @@ -501,6 +510,13 @@ classes: syntax: "{id_nmdc_prefix}:(dobj)-{id_shoulder}-{id_blade}$" interpolated: true range: DataObject + has_newer_version: + range: WorkflowExecution + structured_pattern: + syntax: "{id_nmdc_prefix}:(wfmgas|wfmtas|wfmtex|wfmag|wfrqc|wfrbt|wfmb|wfnom|wfmp|wfmtan|wfmgan)-{id_shoulder}-{id_blade}{id_version}$" + interpolated: true + description: >- + Links to a newer version of this WorkflowExecution, if one exists. processing_institution: required: true was_informed_by: @@ -687,6 +703,11 @@ slots: description: >- The type of planned process that the protocol describes. + has_newer_version: + range: NamedThing + description: >- + Links to a newer version of a resource, if one exists. + enums: CreditEnum: permissible_values: From a4485f08684bce8c058631ca0a08143fbae4b19c Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Tue, 21 Oct 2025 15:17:57 -0700 Subject: [PATCH 2/5] Explicitly constrain range to meet exact WorkflowExecution subclass --- src/schema/workflow_execution_activity.yaml | 51 +++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/src/schema/workflow_execution_activity.yaml b/src/schema/workflow_execution_activity.yaml index faec7fe937..8d47aa9651 100644 --- a/src/schema/workflow_execution_activity.yaml +++ b/src/schema/workflow_execution_activity.yaml @@ -61,6 +61,11 @@ classes: syntax: "{id_nmdc_prefix}:(omprc|dgns)-{id_shoulder}-{id_blade}$" interpolated: true range: NucleotideSequencing + has_newer_version: + range: MetagenomeAssembly + structured_pattern: + syntax: "{id_nmdc_prefix}:wfmgas-{id_shoulder}-{id_blade}{id_version}$" + interpolated: true MetatranscriptomeAssembly: class_uri: nmdc:MetatranscriptomeAssembly @@ -105,6 +110,11 @@ classes: syntax: "{id_nmdc_prefix}:(omprc|dgns)-{id_shoulder}-{id_blade}$" interpolated: true range: NucleotideSequencing + has_newer_version: + range: MetatranscriptomeAssembly + structured_pattern: + syntax: "{id_nmdc_prefix}:wfmtas-{id_shoulder}-{id_blade}{id_version}$" + interpolated: true MetatranscriptomeAnnotation: class_uri: nmdc:MetatranscriptomeAnnotation @@ -137,6 +147,11 @@ classes: structured_pattern: syntax: "^gold:Ga[0-9]+$" interpolated: true + has_newer_version: + range: MetatranscriptomeAnnotation + structured_pattern: + syntax: "{id_nmdc_prefix}:wfmtan-{id_shoulder}-{id_blade}{id_version}$" + interpolated: true MetatranscriptomeExpressionAnalysis: is_a: WorkflowExecution @@ -157,6 +172,12 @@ classes: syntax: "{id_nmdc_prefix}:(omprc|dgns)-{id_shoulder}-{id_blade}$" interpolated: true range: NucleotideSequencing + has_newer_version: + range: MetatranscriptomeExpressionAnalysis + structured_pattern: + syntax: "{id_nmdc_prefix}:wfmtex-{id_shoulder}-{id_blade}{id_version}$" + interpolated: true + class_uri: nmdc:MetatranscriptomeExpressionAnalysis @@ -186,6 +207,11 @@ classes: syntax: "{id_nmdc_prefix}:(omprc|dgns)-{id_shoulder}-{id_blade}$" interpolated: true range: NucleotideSequencing + has_newer_version: + range: MagsAnalysis + structured_pattern: + syntax: "{id_nmdc_prefix}:wfmag-{id_shoulder}-{id_blade}{id_version}$" + interpolated: true ReadQcAnalysis: class_uri: nmdc:ReadQcAnalysis @@ -210,6 +236,11 @@ classes: syntax: "{id_nmdc_prefix}:(omprc|dgns)-{id_shoulder}-{id_blade}$" interpolated: true range: NucleotideSequencing + has_newer_version: + range: ReadQcAnalysis + structured_pattern: + syntax: "{id_nmdc_prefix}:wfrqc-{id_shoulder}-{id_blade}{id_version}$" + interpolated: true ReadBasedTaxonomyAnalysis: class_uri: nmdc:ReadBasedTaxonomyAnalysis @@ -227,6 +258,11 @@ classes: syntax: "{id_nmdc_prefix}:(omprc|dgns)-{id_shoulder}-{id_blade}$" interpolated: true range: NucleotideSequencing + has_newer_version: + range: ReadBasedTaxonomyAnalysis + structured_pattern: + syntax: "{id_nmdc_prefix}:wfrbt-{id_shoulder}-{id_blade}{id_version}$" + interpolated: true MetabolomicsAnalysis: class_uri: nmdc:MetabolomicsAnalysis @@ -246,6 +282,11 @@ classes: syntax: "{id_nmdc_prefix}:(omprc|dgms)-{id_shoulder}-{id_blade}$" interpolated: true range: MassSpectrometry + has_newer_version: + range: MetabolomicsAnalysis + structured_pattern: + syntax: "{id_nmdc_prefix}:wfmb-{id_shoulder}-{id_blade}{id_version}$" + interpolated: true MetaproteomicsAnalysis: class_uri: nmdc:MetaproteomicsAnalysis @@ -263,6 +304,11 @@ classes: syntax: "{id_nmdc_prefix}:(omprc|dgms)-{id_shoulder}-{id_blade}$" interpolated: true range: MassSpectrometry + has_newer_version: + range: MetaproteomicsAnalysis + structured_pattern: + syntax: "{id_nmdc_prefix}:wfmp-{id_shoulder}-{id_blade}{id_version}$" + interpolated: true NomAnalysis: class_uri: nmdc:NomAnalysis @@ -280,6 +326,11 @@ classes: syntax: "{id_nmdc_prefix}:(omprc|dgms)-{id_shoulder}-{id_blade}$" interpolated: true range: MassSpectrometry + has_newer_version: + range: NomAnalysis + structured_pattern: + syntax: "{id_nmdc_prefix}:wfnom-{id_shoulder}-{id_blade}{id_version}$" + interpolated: true slots: From d5777f23bab2cfce18e2606a367f0aebde8d5868 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Tue, 21 Oct 2025 15:40:59 -0700 Subject: [PATCH 3/5] Add valid and invalid examples --- .../MagsAnalysis_invalid-newer-version.yaml | 29 +++++++++++++++++++ src/data/valid/MagsAnalysis.yaml | 3 +- 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 src/data/invalid/MagsAnalysis_invalid-newer-version.yaml diff --git a/src/data/invalid/MagsAnalysis_invalid-newer-version.yaml b/src/data/invalid/MagsAnalysis_invalid-newer-version.yaml new file mode 100644 index 0000000000..dbd2dc80f7 --- /dev/null +++ b/src/data/invalid/MagsAnalysis_invalid-newer-version.yaml @@ -0,0 +1,29 @@ +id: nmdc:wfmag-11-547rwq94.1 +ended_at_time: '2021-09-15T10:13:20+00:00' +execution_resource: NERSC-Perlmutter +processing_institution: NMDC +git_url: git_url1 +name: MAG analysis for project ABCD +started_at_time: '2021-08-05T14:48:51+00:00' +type: nmdc:MagsAnalysis +was_informed_by: + - "nmdc:omprc-11-547rwq95" +has_input: + - "nmdc:dobj-11-547rwq96" +has_output: + - "nmdc:dobj-11-547rwq97" + - "nmdc:dobj-11-547rwq98" +mags_list: + - bin_name: bins.10 + bin_quality: LQ + members_id: + - nmdc:wfmgas-13-56028x05.1_7_c1 + - nmdc:wfmgas-13-56028x05.1_9_c1 + type: nmdc:MagBin + eukaryotic_evaluation: + type: nmdc:EukEval + completeness: 14.71 + contamination: 8.82 + ncbi_lineage_tax_ids: "1-131567-2759-2611352-33682-191814-2603949" + ncbi_lineage: "root,cellular organisms,Eukaryota,Discoba,Euglenozoa,Diplonemea,Diplonemidae" +has_newer_version: nmdc:wfmb-11-547rwq94.2 diff --git a/src/data/valid/MagsAnalysis.yaml b/src/data/valid/MagsAnalysis.yaml index bb93e2c614..f000066f29 100644 --- a/src/data/valid/MagsAnalysis.yaml +++ b/src/data/valid/MagsAnalysis.yaml @@ -25,4 +25,5 @@ mags_list: completeness: 14.71 contamination: 8.82 ncbi_lineage_tax_ids: "1-131567-2759-2611352-33682-191814-2603949" - ncbi_lineage: "root,cellular organisms,Eukaryota,Discoba,Euglenozoa,Diplonemea,Diplonemidae" + ncbi_lineage: "root,cellular organisms,Eukaryota,Discoba,Euglenozoa,Diplonemea,Diplonemidae" +has_newer_version: nmdc:wfmag-11-547rwq94.2 From 2023a5ac2465ed6a9cf35836d568e1719319c155 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Fri, 24 Oct 2025 11:53:32 -0700 Subject: [PATCH 4/5] Narrow has_newer_version to superseded_by and update description --- .../MagsAnalysis_invalid-newer-version.yaml | 2 +- src/data/valid/MagsAnalysis.yaml | 2 +- src/schema/basic_classes.yaml | 27 ++++++------------- src/schema/workflow_execution_activity.yaml | 20 +++++++------- 4 files changed, 20 insertions(+), 31 deletions(-) diff --git a/src/data/invalid/MagsAnalysis_invalid-newer-version.yaml b/src/data/invalid/MagsAnalysis_invalid-newer-version.yaml index dbd2dc80f7..2eb194d808 100644 --- a/src/data/invalid/MagsAnalysis_invalid-newer-version.yaml +++ b/src/data/invalid/MagsAnalysis_invalid-newer-version.yaml @@ -26,4 +26,4 @@ mags_list: contamination: 8.82 ncbi_lineage_tax_ids: "1-131567-2759-2611352-33682-191814-2603949" ncbi_lineage: "root,cellular organisms,Eukaryota,Discoba,Euglenozoa,Diplonemea,Diplonemidae" -has_newer_version: nmdc:wfmb-11-547rwq94.2 +superseded_by: nmdc:wfmb-11-547rwq94.2 diff --git a/src/data/valid/MagsAnalysis.yaml b/src/data/valid/MagsAnalysis.yaml index f000066f29..06b65b9ea4 100644 --- a/src/data/valid/MagsAnalysis.yaml +++ b/src/data/valid/MagsAnalysis.yaml @@ -26,4 +26,4 @@ mags_list: contamination: 8.82 ncbi_lineage_tax_ids: "1-131567-2759-2611352-33682-191814-2603949" ncbi_lineage: "root,cellular organisms,Eukaryota,Discoba,Euglenozoa,Diplonemea,Diplonemidae" -has_newer_version: nmdc:wfmag-11-547rwq94.2 +superseded_by: nmdc:wfmag-11-547rwq94.2 diff --git a/src/schema/basic_classes.yaml b/src/schema/basic_classes.yaml index a5a6fc4fb9..34223f9b89 100644 --- a/src/schema/basic_classes.yaml +++ b/src/schema/basic_classes.yaml @@ -368,7 +368,7 @@ classes: - url - was_generated_by - in_manifest - - has_newer_version + - superseded_by slot_usage: name: required: true @@ -388,13 +388,7 @@ classes: required: true data_category: required: true - has_newer_version: - range: DataObject - structured_pattern: - syntax: "{id_nmdc_prefix}:dobj-{id_shoulder}-{id_blade}$" - interpolated: true - description: >- - Links to a newer version of this DataObject, if one exists. + DataEmitterProcess: class_uri: nmdc:DataEmitterProcess is_a: PlannedProcess @@ -470,7 +464,7 @@ classes: - version - was_informed_by - processing_institution_workflow_metadata - - has_newer_version + - superseded_by rules: - title: qc_status_pass_has_output_required description: >- @@ -510,13 +504,6 @@ classes: syntax: "{id_nmdc_prefix}:(dobj)-{id_shoulder}-{id_blade}$" interpolated: true range: DataObject - has_newer_version: - range: WorkflowExecution - structured_pattern: - syntax: "{id_nmdc_prefix}:(wfmgas|wfmtas|wfmtex|wfmag|wfrqc|wfrbt|wfmb|wfnom|wfmp|wfmtan|wfmgan)-{id_shoulder}-{id_blade}{id_version}$" - interpolated: true - description: >- - Links to a newer version of this WorkflowExecution, if one exists. processing_institution: required: true was_informed_by: @@ -703,10 +690,12 @@ slots: description: >- The type of planned process that the protocol describes. - has_newer_version: - range: NamedThing + superseded_by: + range: WorkflowExecution description: >- - Links to a newer version of a resource, if one exists. + Links a data object or workflow execution to a newer workflow execution that + supersedes it, marking this record as outdated. The linked workflow execution + produced improved or corrected results that should be used instead of this record. enums: CreditEnum: diff --git a/src/schema/workflow_execution_activity.yaml b/src/schema/workflow_execution_activity.yaml index 8d47aa9651..8c017cb671 100644 --- a/src/schema/workflow_execution_activity.yaml +++ b/src/schema/workflow_execution_activity.yaml @@ -61,7 +61,7 @@ classes: syntax: "{id_nmdc_prefix}:(omprc|dgns)-{id_shoulder}-{id_blade}$" interpolated: true range: NucleotideSequencing - has_newer_version: + superseded_by: range: MetagenomeAssembly structured_pattern: syntax: "{id_nmdc_prefix}:wfmgas-{id_shoulder}-{id_blade}{id_version}$" @@ -110,7 +110,7 @@ classes: syntax: "{id_nmdc_prefix}:(omprc|dgns)-{id_shoulder}-{id_blade}$" interpolated: true range: NucleotideSequencing - has_newer_version: + superseded_by: range: MetatranscriptomeAssembly structured_pattern: syntax: "{id_nmdc_prefix}:wfmtas-{id_shoulder}-{id_blade}{id_version}$" @@ -147,7 +147,7 @@ classes: structured_pattern: syntax: "^gold:Ga[0-9]+$" interpolated: true - has_newer_version: + superseded_by: range: MetatranscriptomeAnnotation structured_pattern: syntax: "{id_nmdc_prefix}:wfmtan-{id_shoulder}-{id_blade}{id_version}$" @@ -172,7 +172,7 @@ classes: syntax: "{id_nmdc_prefix}:(omprc|dgns)-{id_shoulder}-{id_blade}$" interpolated: true range: NucleotideSequencing - has_newer_version: + superseded_by: range: MetatranscriptomeExpressionAnalysis structured_pattern: syntax: "{id_nmdc_prefix}:wfmtex-{id_shoulder}-{id_blade}{id_version}$" @@ -207,7 +207,7 @@ classes: syntax: "{id_nmdc_prefix}:(omprc|dgns)-{id_shoulder}-{id_blade}$" interpolated: true range: NucleotideSequencing - has_newer_version: + superseded_by: range: MagsAnalysis structured_pattern: syntax: "{id_nmdc_prefix}:wfmag-{id_shoulder}-{id_blade}{id_version}$" @@ -236,7 +236,7 @@ classes: syntax: "{id_nmdc_prefix}:(omprc|dgns)-{id_shoulder}-{id_blade}$" interpolated: true range: NucleotideSequencing - has_newer_version: + superseded_by: range: ReadQcAnalysis structured_pattern: syntax: "{id_nmdc_prefix}:wfrqc-{id_shoulder}-{id_blade}{id_version}$" @@ -258,7 +258,7 @@ classes: syntax: "{id_nmdc_prefix}:(omprc|dgns)-{id_shoulder}-{id_blade}$" interpolated: true range: NucleotideSequencing - has_newer_version: + superseded_by: range: ReadBasedTaxonomyAnalysis structured_pattern: syntax: "{id_nmdc_prefix}:wfrbt-{id_shoulder}-{id_blade}{id_version}$" @@ -282,7 +282,7 @@ classes: syntax: "{id_nmdc_prefix}:(omprc|dgms)-{id_shoulder}-{id_blade}$" interpolated: true range: MassSpectrometry - has_newer_version: + superseded_by: range: MetabolomicsAnalysis structured_pattern: syntax: "{id_nmdc_prefix}:wfmb-{id_shoulder}-{id_blade}{id_version}$" @@ -304,7 +304,7 @@ classes: syntax: "{id_nmdc_prefix}:(omprc|dgms)-{id_shoulder}-{id_blade}$" interpolated: true range: MassSpectrometry - has_newer_version: + superseded_by: range: MetaproteomicsAnalysis structured_pattern: syntax: "{id_nmdc_prefix}:wfmp-{id_shoulder}-{id_blade}{id_version}$" @@ -326,7 +326,7 @@ classes: syntax: "{id_nmdc_prefix}:(omprc|dgms)-{id_shoulder}-{id_blade}$" interpolated: true range: MassSpectrometry - has_newer_version: + superseded_by: range: NomAnalysis structured_pattern: syntax: "{id_nmdc_prefix}:wfnom-{id_shoulder}-{id_blade}{id_version}$" From 98b02dc7f001c9a83d9b075a59492504a684a1b8 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Fri, 24 Oct 2025 11:57:29 -0700 Subject: [PATCH 5/5] Update description for superseded_by --- src/schema/basic_classes.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/schema/basic_classes.yaml b/src/schema/basic_classes.yaml index 34223f9b89..49830eaffb 100644 --- a/src/schema/basic_classes.yaml +++ b/src/schema/basic_classes.yaml @@ -693,9 +693,9 @@ slots: superseded_by: range: WorkflowExecution description: >- - Links a data object or workflow execution to a newer workflow execution that - supersedes it, marking this record as outdated. The linked workflow execution - produced improved or corrected results that should be used instead of this record. + Links a DataObject or WorkflowExecution record to a newer WorkflowExecution that + supersedes it, marking this record as outdated. The linked WorkflowExecution or + resultant DataObjects should be used in favor of this record. enums: CreditEnum: