kbaseapps · Xiangs18 · Aug 15, 2024 · Aug 16, 2024 · Aug 16, 2024 · Aug 16, 2024
@@ -318,6 +318,7 @@ module GenomeFileUtil {
                 returns (MetagenomeSaveResult returnVal) authentication required;
 
     typedef structure {
+        int workspace_id;
         string workspace;
         string name;
         KBaseGenomes.Genome data;

@@ -69,7 +69,7 @@ class GenomeFileUtil:
     ######################################### noqa
     VERSION = "0.11.7"
     GIT_URL = "[email protected]:kbaseapps/GenomeFileUtil.git"
-    GIT_COMMIT_HASH = "591a19ccf4d1b42f01cc06486654b6d3a8ea08e4"
+    GIT_COMMIT_HASH = "861a50b5ce2d0b8b481fe514b41a6d6b89ee0c96"
 
     #BEGIN_CLASS_HEADER
     #END_CLASS_HEADER
@@ -831,27 +831,27 @@ def fasta_gff_to_metagenome(self, ctx, params):
     def save_one_genome(self, ctx, params):
         """
         :param params: instance of type "SaveOneGenomeParams" -> structure:
-           parameter "workspace" of String, parameter "name" of String,
-           parameter "data" of type "Genome" (Genome type -- annotated and
-           assembled genome data. Field descriptions: id - string - KBase
-           legacy data ID scientific_name - string - human readable species
-           name domain - string - human readable phylogenetic domain name
-           (eg. "Bacteria") warnings - list of string - genome-level warnings
-           generated in the annotation process genome_tiers - list of string
-           - controlled vocabulary (based on app input and checked by
-           GenomeFileUtil) A list of labels describing the data source for
-           this genome. Allowed values - Representative, Reference,
-           ExternalDB, User Tier assignments based on genome source: * All
-           phytozome - Representative and ExternalDB * Phytozome flagship
-           genomes - Reference, Representative and ExternalDB * Ensembl -
-           Representative and ExternalDB * RefSeq Reference - Reference,
-           Representative and ExternalDB * RefSeq Representative -
-           Representative and ExternalDB * RefSeq Latest or All Assemblies
-           folder - ExternalDB * User Data - User tagged feature_counts - map
-           of string to integer - total counts of each type of feature keys
-           are a controlled vocabulary of - "CDS", "gene", "misc_feature",
-           "misc_recomb", "mobile_element", "ncRNA" - 72,
-           "non_coding_features", "non_coding_genes",
+           parameter "workspace_id" of Long, parameter "workspace" of String,
+           parameter "name" of String, parameter "data" of type "Genome"
+           (Genome type -- annotated and assembled genome data. Field
+           descriptions: id - string - KBase legacy data ID scientific_name -
+           string - human readable species name domain - string - human
+           readable phylogenetic domain name (eg. "Bacteria") warnings - list
+           of string - genome-level warnings generated in the annotation
+           process genome_tiers - list of string - controlled vocabulary
+           (based on app input and checked by GenomeFileUtil) A list of
+           labels describing the data source for this genome. Allowed values
+           - Representative, Reference, ExternalDB, User Tier assignments
+           based on genome source: * All phytozome - Representative and
+           ExternalDB * Phytozome flagship genomes - Reference,
+           Representative and ExternalDB * Ensembl - Representative and
+           ExternalDB * RefSeq Reference - Reference, Representative and
+           ExternalDB * RefSeq Representative - Representative and ExternalDB
+           * RefSeq Latest or All Assemblies folder - ExternalDB * User Data
+           - User tagged feature_counts - map of string to integer - total
+           counts of each type of feature keys are a controlled vocabulary of
+           - "CDS", "gene", "misc_feature", "misc_recomb", "mobile_element",
+           "ncRNA" - 72, "non_coding_features", "non_coding_genes",
            "protein_encoding_gene", "rRNA", "rep_origin", "repeat_region",
            "tRNA" genetic_code - int - An NCBI-assigned taxonomic category
            for the organism See here -

@@ -22,7 +22,8 @@
 from installed_clients.WorkspaceClient import Workspace
 from GenomeFileUtil.core.GenomeUtils import (
     is_parent, propagate_cds_props_to_gene, warnings, parse_inferences,
-    load_ontology_mappings, set_taxon_data, set_default_taxon_data
+    load_ontology_mappings, set_taxon_data, set_default_taxon_data,
+    set_up_single_params, validate_mass_params
 )
 
 MAX_MISC_FEATURE_SIZE = 10000
@@ -113,52 +114,16 @@ def __init__(self, config):
 
     def import_genbank(self, params):
         print('validating parameters')
-        mass_params = self._set_up_single_params(params)
+        mass_params = set_up_single_params(
+            params, _WSNAME, self._validate_params, self.dfu.ws_name_to_id
+        )
         return self._import_genbank_mass(mass_params)[0]
 
     def import_genbank_mass(self, params):
         print('validating parameters')
-        self._validate_mass_params(params)
+        validate_mass_params(params, self._validate_params)
         return self._import_genbank_mass(params)
 
-    def _set_up_single_params(self, params):
-        # avoid side effects and keep variables in params unmodfied
-        inputs = dict(params)
-        self._validate_params(inputs)
-        ws_id = self._get_int(inputs.pop(_WSID, None), _WSID)
-        ws_name = inputs.pop(_WSNAME, None)
-        if (bool(ws_id) == bool(ws_name)):  # xnor
-            raise ValueError(f"Exactly one of a '{_WSID}' or a '{_WSNAME}' parameter must be provided")
-        if not ws_id:
-            print(f"Translating workspace name {ws_name} to a workspace ID. Prefer submitting "
-                  + "a workspace ID over a mutable workspace name that may cause race conditions")
-            ws_id = self.dfu.ws_name_to_id(ws_name)
-        mass_params = {_WSID: ws_id, _INPUTS: [inputs]}
-        return mass_params
-
-    def _validate_mass_params(self, params):
-        ws_id = self._get_int(params.get(_WSID), _WSID)
-        if not ws_id:
-            raise ValueError(f"{_WSID} is required")
-        inputs = params.get(_INPUTS)
-        if not inputs or type(inputs) is not list:
-            raise ValueError(f"{_INPUTS} field is required and must be a non-empty list")
-        for i, inp in enumerate(inputs, start=1):
-            if type(inp) is not dict:
-                raise ValueError(f"Entry #{i} in {_INPUTS} field is not a mapping as required")
-            try:
-                self._validate_params(inp)
-            except Exception as e:
-                raise ValueError(f"Entry #{i} in {_INPUTS} field has invalid params: {e}") from e
-
-    def _get_int(self, putative_int, name, minimum=1):
-        if putative_int is not None:
-            if type(putative_int) is not int:
-                raise ValueError(f"{name} must be an integer, got: {putative_int}")
-            if putative_int < minimum:
-                raise ValueError(f"{name} must be an integer >= {minimum}")
-        return putative_int
-
     def _import_genbank_mass(self, params):
 
         workspace_id = params[_WSID]
@@ -197,6 +162,12 @@ def _import_genbank_mass(self, params):
             # parse genbank file
             self._parse_genbank(genome_obj)
 
+            # check features
+            self.gi.check_dna_sequence_in_features(genome_obj.genome_data)
+
+            # validate genome
+            genome_obj.genome_data['warnings'] = self.gi.validate_genome(genome_obj.genome_data)
+
             # gather all objects
             genome_objs.append(genome_obj)
 
@@ -209,7 +180,6 @@ def _import_genbank_mass(self, params):
         for genome_obj in genome_objs:
             shutil.rmtree(genome_obj.input_directory)
 
-        # TODO make an internal mass function save_genomes
         results = self._save_genomes(workspace_id, genome_objs)
 
         # return the result
@@ -227,17 +197,18 @@ def _import_genbank_mass(self, params):
         return details
 
     def _save_genomes(self, workspace_id, genome_objs):
-        results = [
-            self.gi.save_one_genome(
-                {
-                    'workspace': workspace_id,
-                    'name': genome_obj.genome_name,
-                    'data': genome_obj.genome_data,
-                    "meta": genome_obj.genome_meta,
-                }
-            ) for genome_obj in genome_objs
-        ]
-
+        results = self.gi.save_genome_mass(
+            {
+                "workspace_id": workspace_id,
+                "inputs": [
+                    {
+                        "name": genome_obj.genome_name,
+                        "data": genome_obj.genome_data,
+                        "meta": genome_obj.genome_meta,
+                    } for genome_obj in genome_objs
+                ],
+            }
+        )
         return results
 
     def _validate_params(self, params):