From 233b4a871a9794fe07de452b8846c259ba9009c4 Mon Sep 17 00:00:00 2001 From: Michal-Novomestsky Date: Tue, 12 Aug 2025 19:59:32 +1000 Subject: [PATCH 1/8] added identity as alias for tensor_copy and defined No-Op for TensorFromScalar --- pytensor/tensor/basic.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pytensor/tensor/basic.py b/pytensor/tensor/basic.py index 931c7009b3..ca7c73578b 100644 --- a/pytensor/tensor/basic.py +++ b/pytensor/tensor/basic.py @@ -663,6 +663,10 @@ def c_code_cache_version(self): tensor_from_scalar = TensorFromScalar() +@_vectorize_node.register(TensorFromScalar) +def vectorize_tensor_from_scalar(op, node, batch_x): + return identity(batch_x).owner + class ScalarFromTensor(COp): __props__ = () @@ -2046,6 +2050,8 @@ def register_transfer(fn): """Create a duplicate of `a` (with duplicated storage)""" tensor_copy = Elemwise(ps.identity) pprint.assign(tensor_copy, printing.IgnorePrinter()) +identity = tensor_copy +pprint.assign(identity, printing.IgnorePrinter()) class Default(Op): @@ -4603,6 +4609,7 @@ def ix_(*args): "matrix_transpose", "default", "tensor_copy", + "identity", "transfer", "alloc", "identity_like", From d31af6e8aa842e42e6fad6b63e65f0d0265b193b Mon Sep 17 00:00:00 2001 From: Michal-Novomestsky Date: Tue, 12 Aug 2025 20:00:48 +1000 Subject: [PATCH 2/8] refactor: jacobian should use tensorize --- pytensor/tensor/optimize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytensor/tensor/optimize.py b/pytensor/tensor/optimize.py index 2088dd99cd..05d8e86478 100644 --- a/pytensor/tensor/optimize.py +++ b/pytensor/tensor/optimize.py @@ -561,7 +561,7 @@ def L_op(self, inputs, outputs, output_grads): implicit_f = grad(inner_fx, inner_x) df_dx, *df_dtheta_columns = jacobian( - implicit_f, [inner_x, *inner_args], disconnected_inputs="ignore" + implicit_f, [inner_x, *inner_args], disconnected_inputs="ignore", vectorize=True ) grad_wrt_args = implict_optimization_grads( df_dx=df_dx, From 306df119f6c59c45f31738d32760012272a56b0a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 13 Aug 2025 09:36:30 +0000 Subject: [PATCH 3/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pytensor/tensor/basic.py | 1 + pytensor/tensor/optimize.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pytensor/tensor/basic.py b/pytensor/tensor/basic.py index ca7c73578b..2ec008a72d 100644 --- a/pytensor/tensor/basic.py +++ b/pytensor/tensor/basic.py @@ -663,6 +663,7 @@ def c_code_cache_version(self): tensor_from_scalar = TensorFromScalar() + @_vectorize_node.register(TensorFromScalar) def vectorize_tensor_from_scalar(op, node, batch_x): return identity(batch_x).owner diff --git a/pytensor/tensor/optimize.py b/pytensor/tensor/optimize.py index 05d8e86478..57a1313384 100644 --- a/pytensor/tensor/optimize.py +++ b/pytensor/tensor/optimize.py @@ -561,7 +561,10 @@ def L_op(self, inputs, outputs, output_grads): implicit_f = grad(inner_fx, inner_x) df_dx, *df_dtheta_columns = jacobian( - implicit_f, [inner_x, *inner_args], disconnected_inputs="ignore", vectorize=True + implicit_f, + [inner_x, *inner_args], + disconnected_inputs="ignore", + vectorize=True, ) grad_wrt_args = implict_optimization_grads( df_dx=df_dx, From 94c3d6d985e33d91cac5bbeff503fe4e0a723254 Mon Sep 17 00:00:00 2001 From: Michal-Novomestsky Date: Fri, 15 Aug 2025 19:13:30 +1000 Subject: [PATCH 4/8] removed redundant pprint --- pytensor/tensor/basic.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytensor/tensor/basic.py b/pytensor/tensor/basic.py index 2ec008a72d..bf9638c473 100644 --- a/pytensor/tensor/basic.py +++ b/pytensor/tensor/basic.py @@ -2052,7 +2052,6 @@ def register_transfer(fn): tensor_copy = Elemwise(ps.identity) pprint.assign(tensor_copy, printing.IgnorePrinter()) identity = tensor_copy -pprint.assign(identity, printing.IgnorePrinter()) class Default(Op): From 488d0582cb53c131c812cd0b35514a0dce741e8b Mon Sep 17 00:00:00 2001 From: Michal-Novomestsky Date: Fri, 15 Aug 2025 19:49:35 +1000 Subject: [PATCH 5/8] refactor: added vectorize=True to all jacobians --- pytensor/tensor/optimize.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pytensor/tensor/optimize.py b/pytensor/tensor/optimize.py index 57a1313384..7f364c052b 100644 --- a/pytensor/tensor/optimize.py +++ b/pytensor/tensor/optimize.py @@ -820,7 +820,7 @@ def __init__( self.fgraph = FunctionGraph([variables, *args], [equations]) if jac: - jac_wrt_x = jacobian(self.fgraph.outputs[0], self.fgraph.inputs[0]) + jac_wrt_x = jacobian(self.fgraph.outputs[0], self.fgraph.inputs[0], vectorize=True) self.fgraph.add_output(atleast_2d(jac_wrt_x)) self.jac = jac @@ -900,8 +900,8 @@ def L_op( inner_x, *inner_args = self.fgraph.inputs inner_fx = self.fgraph.outputs[0] - df_dx = jacobian(inner_fx, inner_x) if not self.jac else self.fgraph.outputs[1] - df_dtheta_columns = jacobian(inner_fx, inner_args, disconnected_inputs="ignore") + df_dx = jacobian(inner_fx, inner_x, vectorize=True) if not self.jac else self.fgraph.outputs[1] + df_dtheta_columns = jacobian(inner_fx, inner_args, disconnected_inputs="ignore", vectorize=True) grad_wrt_args = implict_optimization_grads( df_dx=df_dx, From 1227c4da13446be49cc77f0d441aecaf00fe5da0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 15 Aug 2025 10:07:43 +0000 Subject: [PATCH 6/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pytensor/tensor/optimize.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/pytensor/tensor/optimize.py b/pytensor/tensor/optimize.py index 7f364c052b..f38f91006f 100644 --- a/pytensor/tensor/optimize.py +++ b/pytensor/tensor/optimize.py @@ -820,7 +820,9 @@ def __init__( self.fgraph = FunctionGraph([variables, *args], [equations]) if jac: - jac_wrt_x = jacobian(self.fgraph.outputs[0], self.fgraph.inputs[0], vectorize=True) + jac_wrt_x = jacobian( + self.fgraph.outputs[0], self.fgraph.inputs[0], vectorize=True + ) self.fgraph.add_output(atleast_2d(jac_wrt_x)) self.jac = jac @@ -900,8 +902,14 @@ def L_op( inner_x, *inner_args = self.fgraph.inputs inner_fx = self.fgraph.outputs[0] - df_dx = jacobian(inner_fx, inner_x, vectorize=True) if not self.jac else self.fgraph.outputs[1] - df_dtheta_columns = jacobian(inner_fx, inner_args, disconnected_inputs="ignore", vectorize=True) + df_dx = ( + jacobian(inner_fx, inner_x, vectorize=True) + if not self.jac + else self.fgraph.outputs[1] + ) + df_dtheta_columns = jacobian( + inner_fx, inner_args, disconnected_inputs="ignore", vectorize=True + ) grad_wrt_args = implict_optimization_grads( df_dx=df_dx, From da10fbc909164c2a1131a7526e8b3bfb8773a4d9 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Fri, 19 Sep 2025 18:45:11 -0500 Subject: [PATCH 7/8] Add option to vectorize jacobian in minimize/root --- pytensor/tensor/optimize.py | 43 +++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/pytensor/tensor/optimize.py b/pytensor/tensor/optimize.py index f38f91006f..25327d4807 100644 --- a/pytensor/tensor/optimize.py +++ b/pytensor/tensor/optimize.py @@ -484,6 +484,7 @@ def __init__( jac: bool = True, hess: bool = False, hessp: bool = False, + use_vectorized_jac: bool = False, optimizer_kwargs: dict | None = None, ): if not cast(TensorVariable, objective).ndim == 0: @@ -496,6 +497,7 @@ def __init__( ) self.fgraph = FunctionGraph([x, *args], [objective]) + self.use_vectorized_jac = use_vectorized_jac if jac: grad_wrt_x = cast( @@ -505,7 +507,12 @@ def __init__( if hess: hess_wrt_x = cast( - Variable, hessian(self.fgraph.outputs[0], self.fgraph.inputs[0]) + Variable, + jacobian( + self.fgraph.outputs[-1], + self.fgraph.inputs[0], + vectorize=use_vectorized_jac, + ), ) self.fgraph.add_output(hess_wrt_x) @@ -564,7 +571,7 @@ def L_op(self, inputs, outputs, output_grads): implicit_f, [inner_x, *inner_args], disconnected_inputs="ignore", - vectorize=True, + vectorize=self.use_vectorized_jac, ) grad_wrt_args = implict_optimization_grads( df_dx=df_dx, @@ -584,6 +591,7 @@ def minimize( method: str = "BFGS", jac: bool = True, hess: bool = False, + use_vectorized_jac: bool = False, optimizer_kwargs: dict | None = None, ) -> tuple[TensorVariable, TensorVariable]: """ @@ -593,18 +601,21 @@ def minimize( ---------- objective : TensorVariable The objective function to minimize. This should be a pytensor variable representing a scalar value. - - x : TensorVariable + x: TensorVariable The variable with respect to which the objective function is minimized. It must be an input to the computational graph of `objective`. - - method : str, optional + method: str, optional The optimization method to use. Default is "BFGS". See scipy.optimize.minimize for other options. - - jac : bool, optional - Whether to compute and use the gradient of teh objective function with respect to x for optimization. + jac: bool, optional + Whether to compute and use the gradient of the objective function with respect to x for optimization. Default is True. - + hess: bool, optional + Whether to compute and use the Hessian of the objective function with respect to x for optimization. + Default is False. Note that some methods require this, while others do not support it. + use_vectorized_jac: bool, optional + Whether to use a vectorized graph (vmap) to compute the jacobian (and/or hessian) matrix. If False, a + scan will be used instead. This comes down to a memory/compute trade-off. Vectorized graphs can be faster, + but use more memory. Default is False. optimizer_kwargs Additional keyword arguments to pass to scipy.optimize.minimize @@ -627,6 +638,7 @@ def minimize( method=method, jac=jac, hess=hess, + use_vectorized_jac=use_vectorized_jac, optimizer_kwargs=optimizer_kwargs, ) @@ -807,6 +819,7 @@ def __init__( method: str = "hybr", jac: bool = True, optimizer_kwargs: dict | None = None, + use_vectorized_jac: bool = False, ): if cast(TensorVariable, variables).ndim != cast(TensorVariable, equations).ndim: raise ValueError( @@ -821,7 +834,9 @@ def __init__( if jac: jac_wrt_x = jacobian( - self.fgraph.outputs[0], self.fgraph.inputs[0], vectorize=True + self.fgraph.outputs[0], + self.fgraph.inputs[0], + vectorize=use_vectorized_jac, ) self.fgraph.add_output(atleast_2d(jac_wrt_x)) @@ -928,6 +943,7 @@ def root( variables: TensorVariable, method: str = "hybr", jac: bool = True, + use_vectorized_jac: bool = False, optimizer_kwargs: dict | None = None, ) -> tuple[TensorVariable, TensorVariable]: """ @@ -946,6 +962,10 @@ def root( jac : bool, optional Whether to compute and use the Jacobian of the `equations` with respect to `variables`. Default is True. Most methods require this. + use_vectorized_jac: bool, optional + Whether to use a vectorized graph (vmap) to compute the jacobian matrix. If False, a scan will be used instead. + This comes down to a memory/compute trade-off. Vectorized graphs can be faster, but use more memory. + Default is False. optimizer_kwargs : dict, optional Additional keyword arguments to pass to `scipy.optimize.root`. @@ -969,6 +989,7 @@ def root( method=method, jac=jac, optimizer_kwargs=optimizer_kwargs, + use_vectorized_jac=use_vectorized_jac, ) solution, success = cast( From a2e3634ec995d5cc694eadfc45c8ec5a76356ecb Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Sat, 20 Sep 2025 13:25:33 -0400 Subject: [PATCH 8/8] pre-commit --- pytensor/tensor/optimize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytensor/tensor/optimize.py b/pytensor/tensor/optimize.py index 25327d4807..7d4bdade77 100644 --- a/pytensor/tensor/optimize.py +++ b/pytensor/tensor/optimize.py @@ -7,7 +7,7 @@ import pytensor.scalar as ps from pytensor.compile.function import function -from pytensor.gradient import grad, hessian, jacobian +from pytensor.gradient import grad, jacobian from pytensor.graph.basic import Apply, Constant from pytensor.graph.fg import FunctionGraph from pytensor.graph.op import ComputeMapType, HasInnerGraph, Op, StorageMapType