Use input size limits for constant folding (#1903)

gramalingam · web-flow · commit 8fef2334da38 · 2024-10-12T04:20:52.000Z
Add input size limits for constant folding. Helps avoid excessive time
in optimizer in some edge cases. (The edge cases, where we have
non-trivial ops applied to large tensors, are not relevant for the
exporter itself. They may be of potential interest for optimization in
other settings, but that can be done by user taking explicit steps.)

Still to be done: how do we specify these values from the benchmarking
code? For now, the default values will be quite useful, but
experimenting with these values from the benchmarking code will need a
way to control these option values.
diff --git a/onnxscript/optimizer/__init__.py b/onnxscript/optimizer/__init__.py
@@ -111,17 +111,46 @@ def optimize(
     return model
 
 
+_DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT = (
+    _constant_folding._DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT
+)
+
+_DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT = (
+    _constant_folding._DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT
+)
+
+
 def optimize_ir(
     model: ir.Model,
     num_iterations: int = 2,
     *,
     onnx_shape_inference: bool = True,
     stop_if_no_change: bool = True,
+    input_size_limit: int = _DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT,
+    output_size_limit: int = _DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT,
 ) -> None:
+    """Optimizes a model.
+
+    Args:
+        model: The model to be optimized.
+        num_iterations: Number of times the optimization loop is repeated.
+        onnx_shape_inference: Applies node-level shape-inference as part of optimization
+        input_size_limit: Will not apply constant folding to ops with any input of size
+            greater than this. Does not apply to special ops like Shape() and Size().
+        output_size_limit: Will not rewrite any foldable-op into a Constant op if the size
+            of the output tensor is greater than this.
+        stop_if_no_change: Not supported currently (has no effect). Meant to stop the
+            outer optimization loop if no change is detected in one iteration.
+    """
     del stop_if_no_change  # Looks like rewriter doesn't support this yet.
     _inliner.inline(model)
     for _ in range(num_iterations):
-        _constant_folding.fold_constants(model, onnx_shape_inference=onnx_shape_inference)
+        _constant_folding.fold_constants(
+            model,
+            onnx_shape_inference=onnx_shape_inference,
+            input_size_limit=input_size_limit,
+            output_size_limit=output_size_limit,
+        )
         rewriter.rewrite(model, pattern_rewrite_rules=_DEFAULT_REWRITE_RULES)
     remove_unused_nodes(model)
 
diff --git a/onnxscript/optimizer/_constant_folding.py b/onnxscript/optimizer/_constant_folding.py
@@ -43,7 +43,9 @@ def is_constant_op(node: ir.Node) -> bool:
     )
 
 
-_DEFAULT_CONSTANT_FOLD_SIZE_LIMIT = constant_folding._DEFAULT_CONSTANT_FOLD_SIZE_LIMIT
+_DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT = 1024
+
+_DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT = constant_folding._DEFAULT_CONSTANT_FOLD_SIZE_LIMIT
 
 logger = logging.getLogger(__name__)
 
@@ -550,11 +552,16 @@ class ConstantFolder:
 
     def __init__(
         self,
+        *,
         external_data_folder: str,
-        do_shape_inference: bool,
+        shape_inference: bool,
+        input_size_limit: int,
+        output_size_limit: int,
     ) -> None:
         self._external_data_folder = external_data_folder
-        self._do_shape_inference = do_shape_inference
+        self._shape_inference = shape_inference
+        self._input_size_limit = input_size_limit
+        self._output_size_limit = output_size_limit
         self._init()
 
     def _init(self) -> None:
@@ -632,7 +639,7 @@ def new_constant(self, irvalue: ir.Value, value):
 
         irvalue.const_value = _convenience.tensor(value)
 
-        if value.nbytes > _DEFAULT_CONSTANT_FOLD_SIZE_LIMIT:
+        if value.nbytes > self._output_size_limit:
             logger.info(
                 "Skip storing constant folded nvalue %s due to large size %s.",
                 irvalue.name,
@@ -667,7 +674,7 @@ def process_node(self, node: ir.Node):
                 # TODO(rama): consider merging type/other info from both values
 
         # Do incremental shape inference
-        if self._do_shape_inference and not is_control_flow_op(node):
+        if self._shape_inference and not is_control_flow_op(node):
             self._do_inference(node)
 
         if node.domain not in self.opset_imports:
@@ -696,6 +703,16 @@ def process_node(self, node: ir.Node):
         if any(x is None for x in input_values):
             return None
 
+        if any(input.size > self._input_size_limit for input in input_values):  # type: ignore[union-attr]
+            if logger.isEnabledFor(logging.DEBUG):
+                input_sizes = [input.size for input in input_values]  # type: ignore[union-attr]
+                logger.debug(
+                    "Skipping constant folding for op %s due to large input size: %s",
+                    node.op_type,
+                    input_sizes,
+                )
+            return None
+
         # Filter out bfloat16 cases?
         def convert(av):
             if av.type == ir.AttributeType.TENSOR:
@@ -770,14 +787,18 @@ def fold_constants(
     external_data_folder: str = "",
     *,
     onnx_shape_inference: bool = False,
+    input_size_limit: int = _DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT,
+    output_size_limit: int = _DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT,
 ) -> bool:
     """
     Applies constant folding optimization to the model.
     Returns true iff the model was modified.
     """
     folder = ConstantFolder(
-        external_data_folder,
-        onnx_shape_inference,
+        external_data_folder=external_data_folder,
+        shape_inference=onnx_shape_inference,
+        input_size_limit=input_size_limit,
+        output_size_limit=output_size_limit,
     )
     folder.visit_model(model)
     for op in folder.counts: