zenml-io
diff --git a/‎requirements.txt‎
Lines changed: 11 additions & 8 deletions b/‎requirements.txt‎
Lines changed: 11 additions & 8 deletions
diff --git a/‎zenml/backends/orchestrator/base/zenml_local_orchestrator.py‎
Lines changed: 67 additions & 24 deletions b/‎zenml/backends/orchestrator/base/zenml_local_orchestrator.py‎
Lines changed: 67 additions & 24 deletions
diff --git a/‎zenml/backends/processing/processing_spark_backend.py‎
Lines changed: 39 additions & 3 deletions b/‎zenml/backends/processing/processing_spark_backend.py‎
Lines changed: 39 additions & 3 deletions
diff --git a/‎zenml/components/__init__.py‎
Lines changed: 0 additions & 1 deletion b/‎zenml/components/__init__.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎zenml/components/bulk_inferrer/component.py‎
Lines changed: 1 addition & 1 deletion b/‎zenml/components/bulk_inferrer/component.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎zenml/components/bulk_inferrer/executor.py‎
Lines changed: 11 additions & 11 deletions b/‎zenml/components/bulk_inferrer/executor.py‎
Lines changed: 11 additions & 11 deletions
diff --git a/‎zenml/components/data_gen/component.py‎
Lines changed: 2 additions & 4 deletions b/‎zenml/components/data_gen/component.py‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎zenml/components/data_gen/executor.py‎
Lines changed: 2 additions & 2 deletions b/‎zenml/components/data_gen/executor.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎zenml/components/evaluator/component.py‎
Lines changed: 2 additions & 3 deletions b/‎zenml/components/evaluator/component.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎zenml/components/evaluator/executor.py‎
Lines changed: 4 additions & 3 deletions b/‎zenml/components/evaluator/executor.py‎
Lines changed: 4 additions & 3 deletions
@@ -1,21 +1,24 @@
 absl-py==0.10.0
 pip-check-reqs>=2.0.1,<3
 click>=7.0,<8
-setuptools>=38.4.0
+setuptools==46.4.0
 nbformat>=5.0.4
 panel==0.8.3
 plotly==4.0.0
 tabulate==0.8.7
-numpy==1.18.0
+numpy==1.19.2
 httplib2==0.17.0
-tfx==0.26.1
+six==1.15.0
+tfx==0.30.0
+tensorflow_datasets==4.3.0
 fire==0.3.1
 gitpython==3.1.11
 analytics-python==1.2.9
 distro==1.5.0
-tensorflow>=2.3.0,<2.4.0
-tensorflow-serving-api==2.3.0
-
+tensorflow==2.4.1
+grpcio==1.32.0
+dill==0.3.1.1
+google-cloud-bigquery==1.28.0
 
 # docs
 jupyter-book==0.9.1
@@ -32,8 +35,8 @@ sphinxext-opengraph==0.3.1
 cortex==0.29.0
 
 # gcp
-apache-beam[gcp]==2.27.0
-apache-beam==2.27.0
+apache-beam[gcp]==2.28.0
+apache-beam==2.28.0
 google-apitools==0.5.31
 
 # pytorch
 
@@ -12,11 +12,20 @@
 #  or implied. See the License for the specific language governing
 #  permissions and limitations under the License.
 
-from tfx.orchestration import data_types
+import os
+from typing import Union
+
+from absl import logging
+from tfx.dsl.compiler import compiler
+from tfx.dsl.compiler import constants
 from tfx.orchestration import metadata
-from tfx.orchestration import pipeline
-from tfx.orchestration.config import config_utils
+from tfx.orchestration import pipeline as pipeline_py
+from tfx.orchestration.local import runner_utils
 from tfx.orchestration.local.local_dag_runner import LocalDagRunner
+from tfx.orchestration.portable import launcher
+from tfx.orchestration.portable import runtime_parameter_utils
+from tfx.proto.orchestration import pipeline_pb2
+from tfx.utils import telemetry_utils
 
 from zenml.logger import get_logger
 
@@ -32,24 +41,58 @@ class ZenMLLocalDagRunner(LocalDagRunner):
     https://github.com/tensorflow/tfx/blob/master/tfx/orchestration/local/
     """
 
-    def run(self, tfx_pipeline: pipeline.Pipeline) -> None:
-        for component in tfx_pipeline.components:
-            (component_launcher_class, component_config) = (
-                config_utils.find_component_launch_info(self._config,
-                                                        component))
-            driver_args = data_types.DriverArgs(
-                enable_cache=tfx_pipeline.enable_cache)
-            metadata_connection = metadata.Metadata(
-                tfx_pipeline.metadata_connection_config)
-            component_launcher = component_launcher_class.create(
-                component=component,
-                pipeline_info=tfx_pipeline.pipeline_info,
-                driver_args=driver_args,
-                metadata_connection=metadata_connection,
-                beam_pipeline_args=tfx_pipeline.beam_pipeline_args,
-                additional_pipeline_args=tfx_pipeline
-                    .additional_pipeline_args,
-                component_config=component_config)
-            logger.info('Component %s is running.', component.id)
-            component_launcher.launch()
-            logger.info('Component %s is finished.', component.id)
+    def run(self, pipeline: Union[pipeline_pb2.Pipeline,
+                                  pipeline_py.Pipeline]) -> None:
+        """Runs given logical pipeline locally.
+
+        Args:
+          pipeline: Logical pipeline containing pipeline args and components.
+        """
+        # For CLI, while creating or updating pipeline, pipeline_args are extracted
+        # and hence we avoid executing the pipeline.
+        if 'TFX_JSON_EXPORT_PIPELINE_ARGS_PATH' in os.environ:
+            return
+        run_id = pipeline.pipeline_info.run_id
+
+        if isinstance(pipeline, pipeline_py.Pipeline):
+            c = compiler.Compiler()
+            pipeline = c.compile(pipeline)
+
+        # Substitute the runtime parameter to be a concrete run_id
+        runtime_parameter_utils.substitute_runtime_parameter(
+            pipeline, {
+                constants.PIPELINE_RUN_ID_PARAMETER_NAME: run_id
+            })
+
+        deployment_config = runner_utils.extract_local_deployment_config(
+            pipeline)
+        connection_config = deployment_config.metadata_connection_config
+
+        logging.info('Running pipeline:\n %s', pipeline)
+        logging.info('Using deployment config:\n %s', deployment_config)
+        logging.info('Using connection config:\n %s', connection_config)
+
+        with telemetry_utils.scoped_labels(
+                {telemetry_utils.LABEL_TFX_RUNNER: 'local'}):
+            # Run each component. Note that the pipeline.components list is in
+            # topological order.
+            # TODO(b/171319478): After IR-based execution is used, used multi-threaded
+            #   execution so that independent components can be run in parallel.
+            for node in pipeline.nodes:
+                pipeline_node = node.pipeline_node
+                node_id = pipeline_node.node_info.id
+                executor_spec = runner_utils.extract_executor_spec(
+                    deployment_config, node_id)
+                custom_driver_spec = runner_utils.extract_custom_driver_spec(
+                    deployment_config, node_id)
+
+                component_launcher = launcher.Launcher(
+                    pipeline_node=pipeline_node,
+                    mlmd_connection=metadata.Metadata(connection_config),
+                    pipeline_info=pipeline.pipeline_info,
+                    pipeline_runtime_spec=pipeline.runtime_spec,
+                    executor_spec=executor_spec,
+                    custom_driver_spec=custom_driver_spec)
+                logging.info('Component %s is running.', node_id)
+                component_launcher.launch()
+                logging.info('Component %s is finished.', node_id)
@@ -13,6 +13,9 @@
 #  permissions and limitations under the License.
 """Definition of the Spark Processing Backend"""
 
+import multiprocessing
+from typing import Text, Optional, List
+
 from zenml.backends.processing import ProcessingBaseBackend
 
 
@@ -30,6 +33,39 @@ class ProcessingSparkBackend(ProcessingBaseBackend):
     This backend is not implemented yet.
     """
 
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-        raise NotImplementedError('Its coming soon!')
+    def __init__(self,
+                 spark_rest_url: Text,
+                 environment_type: Text = 'LOOPBACK',
+                 environment_cache_millis: int = 1000000,
+                 spark_submit_uber_jar: bool = True):
+
+        self.spark_rest_url = spark_rest_url
+        self.environment_type = environment_type
+        self.environment_cache_millis = environment_cache_millis
+        self.spark_submit_uber_jar = spark_submit_uber_jar
+
+        try:
+            parallelism = multiprocessing.cpu_count()
+        except NotImplementedError:
+            parallelism = 1
+        self.sdk_worker_parallelism = parallelism
+
+        super(ProcessingSparkBackend, self).__init__(
+            environment_type=environment_type,
+            environment_cache_millis=environment_cache_millis,
+            spark_submit_uber_jar=spark_submit_uber_jar,
+            spark_rest_url=self.spark_rest_url)
+
+    def get_beam_args(self,
+                      pipeline_name: Text = None,
+                      pipeline_root: Text = None) -> Optional[List[Text]]:
+
+        return [
+            '--runner=SparkRunner',
+            '--spark_rest_url=' + self.spark_rest_url,
+            '--environment_type=' + self.environment_type,
+            '--environment_cache_millis=' + str(self.environment_cache_millis),
+            '--sdk_worker_parallelism=' + str(self.sdk_worker_parallelism),
+            '--experiments=use_loopback_process_worker=True',
+            '--experiments=pre_optimize=all',
+            '--spark_submit_uber_jar']
@@ -19,4 +19,3 @@
 from zenml.components.split_gen.component import SplitGen
 from zenml.components.tokenizer.component import Tokenizer
 from zenml.components.trainer.component import Trainer
-from zenml.components.transform_simple.component import SimpleTransform
@@ -29,7 +29,7 @@
 class BulkInferrerSpec(ComponentSpec):
     PARAMETERS = {
         StepKeys.SOURCE: ExecutionParameter(type=Text),
-        StepKeys.ARGS: ExecutionParameter(type=Dict[Text, Any]),
+        StepKeys.ARGS: ExecutionParameter(type=Text),
     }
     INPUTS = {
         MODEL: ChannelParameter(type=standard_artifacts.Model, optional=True),
 
@@ -12,24 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ZenML bulk inferrer executor."""
-
+import json
 from typing import Any, Dict, List, Text
 from typing import Optional
 
 import apache_beam as beam
 from absl import logging
 from tfx import types
+from tfx.components.bulk_inferrer.executor import _RunInference
 from tfx.components.util import model_utils
 from tfx.dsl.components.base import base_executor
 from tfx.proto import bulk_inferrer_pb2
 from tfx.types import artifact_utils
 from tfx.utils import path_utils
 from tfx_bsl.public.proto import model_spec_pb2
-from tfx.components.bulk_inferrer.executor import _RunInference
 
-from zenml.components.bulk_inferrer.utils import convert_to_dict
 from zenml.components.bulk_inferrer.constants import MODEL, EXAMPLES, \
     MODEL_BLESSING, PREDICTIONS
+from zenml.components.bulk_inferrer.utils import convert_to_dict
 from zenml.standards.standard_keys import StepKeys
 from zenml.steps.inferrer import BaseInferrer
 from zenml.utils import source_utils
@@ -58,7 +58,7 @@ def Do(self, input_dict: Dict[Text, List[types.Artifact]],
         self._log_startup(input_dict, output_dict, exec_properties)
 
         source = exec_properties[StepKeys.SOURCE]
-        args = exec_properties[StepKeys.ARGS]
+        args = json.loads(exec_properties[StepKeys.ARGS])
         c = source_utils.load_source_path_class(source)
         inferrer_step: BaseInferrer = c(**args)
 
@@ -148,12 +148,12 @@ def _run_model_inference(
                 logging.info('Path of output examples split `%s` is %s.',
                              split, output_examples_split_uri)
                 _ = (
-                    pipeline
-                    | 'RunInference[{}]'.format(split) >>
-                    _RunInference(example_uri, inference_endpoint)
-                    | 'ConvertToDict[{}]'.format(split) >>
-                    beam.Map(convert_to_dict, output_example_spec)
-                    | 'WriteOutput[{}]'.format(split) >>
-                    inferrer_step.write_inference_results())
+                        pipeline
+                        | 'RunInference[{}]'.format(split) >>
+                        _RunInference(example_uri, inference_endpoint)
+                        | 'ConvertToDict[{}]'.format(split) >>
+                        beam.Map(convert_to_dict, output_example_spec)
+                        | 'WriteOutput[{}]'.format(split) >>
+                        inferrer_step.write_inference_results())
 
             logging.info('Output examples written to %s.', output_examples.uri)
@@ -15,7 +15,7 @@ class DataGenSpec(ComponentSpec):
     PARAMETERS = {
         StepKeys.NAME: ExecutionParameter(type=Text),
         StepKeys.SOURCE: ExecutionParameter(type=Text),
-        StepKeys.ARGS: ExecutionParameter(type=Dict[Text, Any]),
+        StepKeys.ARGS: ExecutionParameter(type=Text),
     }
     INPUTS = {}
     OUTPUTS = {
@@ -31,7 +31,6 @@ def __init__(self,
                  name: Text,
                  source: Text,
                  source_args: Dict[Text, Any],
-                 instance_name: Optional[Text] = None,
                  examples: Optional[ChannelParameter] = None):
         """
         Interface for all DataGen components, the main component responsible
@@ -53,5 +52,4 @@ def __init__(self,
                                args=source_args,
                                examples=examples)
 
-        super(DataGen, self).__init__(spec=spec,
-                                      instance_name=instance_name)
+        super(DataGen, self).__init__(spec=spec)
@@ -11,7 +11,7 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 #  or implied. See the License for the specific language governing
 #  permissions and limitations under the License.
-
+import json
 from typing import Dict, Text, Any, List
 
 from tfx import types
@@ -37,7 +37,7 @@ def Do(self,
             exec_properties:
         """
         source = exec_properties[StepKeys.SOURCE]
-        args = exec_properties[StepKeys.ARGS]
+        args = json.loads(exec_properties[StepKeys.ARGS])
         name = exec_properties[StepKeys.NAME]
 
         c = source_utils.load_source_path_class(source)
 
@@ -16,7 +16,7 @@
 
 class ZenMLEvaluatorSpec(ComponentSpec):
     PARAMETERS = {constants.SOURCE: ExecutionParameter(type=Text),
-                  constants.ARGS: ExecutionParameter(type=Dict[Text, Any])}
+                  constants.ARGS: ExecutionParameter(Text)}
 
     INPUTS = {constants.EXAMPLES: ChannelParameter(type=Examples),
               constants.MODEL: ChannelParameter(type=Model, optional=True),
@@ -42,7 +42,6 @@ def __init__(
             examples: types.Channel = None,
             model: types.Channel = None,
             output: Optional[types.Channel] = None,
-            instance_name: Optional[Text] = None,
             schema: Optional[types.Channel] = None):
 
         # Create the output artifact if not provided
@@ -55,4 +54,4 @@ def __init__(
                                   model=model,
                                   schema=schema,
                                   evaluation=evaluation)
-        super(Evaluator, self).__init__(spec=spec, instance_name=instance_name)
+        super(Evaluator, self).__init__(spec=spec)
@@ -1,3 +1,4 @@
+import json
 from typing import Any, Dict, List, Text, Callable, Optional
 
 import apache_beam as beam
@@ -49,7 +50,7 @@ def Do(self,
 
         # Create the step with the schema attached if provided
         source = exec_properties[StepKeys.SOURCE]
-        args = exec_properties[StepKeys.ARGS]
+        args = json.loads(exec_properties[StepKeys.ARGS])
         c = source_utils.load_source_path_class(source)
         evaluator_step: BaseEvaluatorStep = c(**args)
 
@@ -160,7 +161,7 @@ def Do(self,
                                 )
                         examples_list.append(data)
                 # Resolve custom extractors
-                custom_extractors = try_get_fn(evaluator_step.CUSTOM_MODULE,
+                custom_extractors = try_get_fn(evaluator_step.CUSTOM_MODULE or '',
                                                'custom_extractors')
                 extractors = None
                 if custom_extractors:
@@ -170,7 +171,7 @@ def Do(self,
                         tensor_adapter_config=tensor_adapter_config)
 
                 # Resolve custom evaluators
-                custom_evaluators = try_get_fn(evaluator_step.CUSTOM_MODULE,
+                custom_evaluators = try_get_fn(evaluator_step.CUSTOM_MODULE or '',
                                                'custom_evaluators')
                 evaluators = None
                 if custom_evaluators:
Original file line number	Diff line number	Diff line change
`@@ -29,7 +29,7 @@`
`29`	`29`	`class BulkInferrerSpec(ComponentSpec):`
`30`	`30`	`PARAMETERS = {`
`31`	`31`	`StepKeys.SOURCE: ExecutionParameter(type=Text),`
`32`		`- StepKeys.ARGS: ExecutionParameter(type=Dict[Text, Any]),`
	`32`	`+ StepKeys.ARGS: ExecutionParameter(type=Text),`
`33`	`33`	`}`
`34`	`34`	`INPUTS = {`
`35`	`35`	`MODEL: ChannelParameter(type=standard_artifacts.Model, optional=True),`