From 233b4a871a9794fe07de452b8846c259ba9009c4 Mon Sep 17 00:00:00 2001
From: Michal-Novomestsky <mishko.novo@gmail.com>
Date: Tue, 12 Aug 2025 19:59:32 +1000
Subject: [PATCH 1/8] added identity as alias for tensor_copy and defined No-Op
 for TensorFromScalar

---
 pytensor/tensor/basic.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pytensor/tensor/basic.py b/pytensor/tensor/basic.py
index 931c7009b3..ca7c73578b 100644
--- a/pytensor/tensor/basic.py
+++ b/pytensor/tensor/basic.py
@@ -663,6 +663,10 @@ def c_code_cache_version(self):
 
 tensor_from_scalar = TensorFromScalar()
 
+@_vectorize_node.register(TensorFromScalar)
+def vectorize_tensor_from_scalar(op, node, batch_x):
+    return identity(batch_x).owner
+
 
 class ScalarFromTensor(COp):
     __props__ = ()
@@ -2046,6 +2050,8 @@ def register_transfer(fn):
 """Create a duplicate of `a` (with duplicated storage)"""
 tensor_copy = Elemwise(ps.identity)
 pprint.assign(tensor_copy, printing.IgnorePrinter())
+identity = tensor_copy
+pprint.assign(identity, printing.IgnorePrinter())
 
 
 class Default(Op):
@@ -4603,6 +4609,7 @@ def ix_(*args):
     "matrix_transpose",
     "default",
     "tensor_copy",
+    "identity",
     "transfer",
     "alloc",
     "identity_like",

From d31af6e8aa842e42e6fad6b63e65f0d0265b193b Mon Sep 17 00:00:00 2001
From: Michal-Novomestsky <mishko.novo@gmail.com>
Date: Tue, 12 Aug 2025 20:00:48 +1000
Subject: [PATCH 2/8] refactor: jacobian should use tensorize

---
 pytensor/tensor/optimize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytensor/tensor/optimize.py b/pytensor/tensor/optimize.py
index 2088dd99cd..05d8e86478 100644
--- a/pytensor/tensor/optimize.py
+++ b/pytensor/tensor/optimize.py
@@ -561,7 +561,7 @@ def L_op(self, inputs, outputs, output_grads):
         implicit_f = grad(inner_fx, inner_x)
 
         df_dx, *df_dtheta_columns = jacobian(
-            implicit_f, [inner_x, *inner_args], disconnected_inputs="ignore"
+            implicit_f, [inner_x, *inner_args], disconnected_inputs="ignore", vectorize=True
         )
         grad_wrt_args = implict_optimization_grads(
             df_dx=df_dx,

From 306df119f6c59c45f31738d32760012272a56b0a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 13 Aug 2025 09:36:30 +0000
Subject: [PATCH 3/8] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 pytensor/tensor/basic.py    | 1 +
 pytensor/tensor/optimize.py | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/pytensor/tensor/basic.py b/pytensor/tensor/basic.py
index ca7c73578b..2ec008a72d 100644
--- a/pytensor/tensor/basic.py
+++ b/pytensor/tensor/basic.py
@@ -663,6 +663,7 @@ def c_code_cache_version(self):
 
 tensor_from_scalar = TensorFromScalar()
 
+
 @_vectorize_node.register(TensorFromScalar)
 def vectorize_tensor_from_scalar(op, node, batch_x):
     return identity(batch_x).owner
diff --git a/pytensor/tensor/optimize.py b/pytensor/tensor/optimize.py
index 05d8e86478..57a1313384 100644
--- a/pytensor/tensor/optimize.py
+++ b/pytensor/tensor/optimize.py
@@ -561,7 +561,10 @@ def L_op(self, inputs, outputs, output_grads):
         implicit_f = grad(inner_fx, inner_x)
 
         df_dx, *df_dtheta_columns = jacobian(
-            implicit_f, [inner_x, *inner_args], disconnected_inputs="ignore", vectorize=True
+            implicit_f,
+            [inner_x, *inner_args],
+            disconnected_inputs="ignore",
+            vectorize=True,
         )
         grad_wrt_args = implict_optimization_grads(
             df_dx=df_dx,

From 94c3d6d985e33d91cac5bbeff503fe4e0a723254 Mon Sep 17 00:00:00 2001
From: Michal-Novomestsky <mishko.novo@gmail.com>
Date: Fri, 15 Aug 2025 19:13:30 +1000
Subject: [PATCH 4/8] removed redundant pprint

---
 pytensor/tensor/basic.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pytensor/tensor/basic.py b/pytensor/tensor/basic.py
index 2ec008a72d..bf9638c473 100644
--- a/pytensor/tensor/basic.py
+++ b/pytensor/tensor/basic.py
@@ -2052,7 +2052,6 @@ def register_transfer(fn):
 tensor_copy = Elemwise(ps.identity)
 pprint.assign(tensor_copy, printing.IgnorePrinter())
 identity = tensor_copy
-pprint.assign(identity, printing.IgnorePrinter())
 
 
 class Default(Op):

From 488d0582cb53c131c812cd0b35514a0dce741e8b Mon Sep 17 00:00:00 2001
From: Michal-Novomestsky <mishko.novo@gmail.com>
Date: Fri, 15 Aug 2025 19:49:35 +1000
Subject: [PATCH 5/8] refactor: added vectorize=True to all jacobians

---
 pytensor/tensor/optimize.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pytensor/tensor/optimize.py b/pytensor/tensor/optimize.py
index 57a1313384..7f364c052b 100644
--- a/pytensor/tensor/optimize.py
+++ b/pytensor/tensor/optimize.py
@@ -820,7 +820,7 @@ def __init__(
         self.fgraph = FunctionGraph([variables, *args], [equations])
 
         if jac:
-            jac_wrt_x = jacobian(self.fgraph.outputs[0], self.fgraph.inputs[0])
+            jac_wrt_x = jacobian(self.fgraph.outputs[0], self.fgraph.inputs[0], vectorize=True)
             self.fgraph.add_output(atleast_2d(jac_wrt_x))
 
         self.jac = jac
@@ -900,8 +900,8 @@ def L_op(
         inner_x, *inner_args = self.fgraph.inputs
         inner_fx = self.fgraph.outputs[0]
 
-        df_dx = jacobian(inner_fx, inner_x) if not self.jac else self.fgraph.outputs[1]
-        df_dtheta_columns = jacobian(inner_fx, inner_args, disconnected_inputs="ignore")
+        df_dx = jacobian(inner_fx, inner_x, vectorize=True) if not self.jac else self.fgraph.outputs[1]
+        df_dtheta_columns = jacobian(inner_fx, inner_args, disconnected_inputs="ignore", vectorize=True)
 
         grad_wrt_args = implict_optimization_grads(
             df_dx=df_dx,

From 1227c4da13446be49cc77f0d441aecaf00fe5da0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 15 Aug 2025 10:07:43 +0000
Subject: [PATCH 6/8] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 pytensor/tensor/optimize.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/pytensor/tensor/optimize.py b/pytensor/tensor/optimize.py
index 7f364c052b..f38f91006f 100644
--- a/pytensor/tensor/optimize.py
+++ b/pytensor/tensor/optimize.py
@@ -820,7 +820,9 @@ def __init__(
         self.fgraph = FunctionGraph([variables, *args], [equations])
 
         if jac:
-            jac_wrt_x = jacobian(self.fgraph.outputs[0], self.fgraph.inputs[0], vectorize=True)
+            jac_wrt_x = jacobian(
+                self.fgraph.outputs[0], self.fgraph.inputs[0], vectorize=True
+            )
             self.fgraph.add_output(atleast_2d(jac_wrt_x))
 
         self.jac = jac
@@ -900,8 +902,14 @@ def L_op(
         inner_x, *inner_args = self.fgraph.inputs
         inner_fx = self.fgraph.outputs[0]
 
-        df_dx = jacobian(inner_fx, inner_x, vectorize=True) if not self.jac else self.fgraph.outputs[1]
-        df_dtheta_columns = jacobian(inner_fx, inner_args, disconnected_inputs="ignore", vectorize=True)
+        df_dx = (
+            jacobian(inner_fx, inner_x, vectorize=True)
+            if not self.jac
+            else self.fgraph.outputs[1]
+        )
+        df_dtheta_columns = jacobian(
+            inner_fx, inner_args, disconnected_inputs="ignore", vectorize=True
+        )
 
         grad_wrt_args = implict_optimization_grads(
             df_dx=df_dx,

From da10fbc909164c2a1131a7526e8b3bfb8773a4d9 Mon Sep 17 00:00:00 2001
From: jessegrabowski <jessegrabowski@gmail.com>
Date: Fri, 19 Sep 2025 18:45:11 -0500
Subject: [PATCH 7/8] Add option to vectorize jacobian in minimize/root

---
 pytensor/tensor/optimize.py | 43 +++++++++++++++++++++++++++----------
 1 file changed, 32 insertions(+), 11 deletions(-)

diff --git a/pytensor/tensor/optimize.py b/pytensor/tensor/optimize.py
index f38f91006f..25327d4807 100644
--- a/pytensor/tensor/optimize.py
+++ b/pytensor/tensor/optimize.py
@@ -484,6 +484,7 @@ def __init__(
         jac: bool = True,
         hess: bool = False,
         hessp: bool = False,
+        use_vectorized_jac: bool = False,
         optimizer_kwargs: dict | None = None,
     ):
         if not cast(TensorVariable, objective).ndim == 0:
@@ -496,6 +497,7 @@ def __init__(
             )
 
         self.fgraph = FunctionGraph([x, *args], [objective])
+        self.use_vectorized_jac = use_vectorized_jac
 
         if jac:
             grad_wrt_x = cast(
@@ -505,7 +507,12 @@ def __init__(
 
         if hess:
             hess_wrt_x = cast(
-                Variable, hessian(self.fgraph.outputs[0], self.fgraph.inputs[0])
+                Variable,
+                jacobian(
+                    self.fgraph.outputs[-1],
+                    self.fgraph.inputs[0],
+                    vectorize=use_vectorized_jac,
+                ),
             )
             self.fgraph.add_output(hess_wrt_x)
 
@@ -564,7 +571,7 @@ def L_op(self, inputs, outputs, output_grads):
             implicit_f,
             [inner_x, *inner_args],
             disconnected_inputs="ignore",
-            vectorize=True,
+            vectorize=self.use_vectorized_jac,
         )
         grad_wrt_args = implict_optimization_grads(
             df_dx=df_dx,
@@ -584,6 +591,7 @@ def minimize(
     method: str = "BFGS",
     jac: bool = True,
     hess: bool = False,
+    use_vectorized_jac: bool = False,
     optimizer_kwargs: dict | None = None,
 ) -> tuple[TensorVariable, TensorVariable]:
     """
@@ -593,18 +601,21 @@ def minimize(
     ----------
     objective : TensorVariable
         The objective function to minimize. This should be a pytensor variable representing a scalar value.
-
-    x : TensorVariable
+    x: TensorVariable
         The variable with respect to which the objective function is minimized. It must be an input to the
         computational graph of `objective`.
-
-    method : str, optional
+    method: str, optional
         The optimization method to use. Default is "BFGS". See scipy.optimize.minimize for other options.
-
-    jac : bool, optional
-        Whether to compute and use the gradient of teh objective function with respect to x for optimization.
+    jac: bool, optional
+        Whether to compute and use the gradient of the objective function with respect to x for optimization.
         Default is True.
-
+    hess: bool, optional
+        Whether to compute and use the Hessian of the objective function with respect to x for optimization.
+        Default is False. Note that some methods require this, while others do not support it.
+    use_vectorized_jac: bool, optional
+        Whether to use a vectorized graph (vmap) to compute the jacobian (and/or hessian) matrix. If False, a
+        scan will be used instead. This comes down to a memory/compute trade-off. Vectorized graphs can be faster,
+        but use more memory. Default is False.
     optimizer_kwargs
         Additional keyword arguments to pass to scipy.optimize.minimize
 
@@ -627,6 +638,7 @@ def minimize(
         method=method,
         jac=jac,
         hess=hess,
+        use_vectorized_jac=use_vectorized_jac,
         optimizer_kwargs=optimizer_kwargs,
     )
 
@@ -807,6 +819,7 @@ def __init__(
         method: str = "hybr",
         jac: bool = True,
         optimizer_kwargs: dict | None = None,
+        use_vectorized_jac: bool = False,
     ):
         if cast(TensorVariable, variables).ndim != cast(TensorVariable, equations).ndim:
             raise ValueError(
@@ -821,7 +834,9 @@ def __init__(
 
         if jac:
             jac_wrt_x = jacobian(
-                self.fgraph.outputs[0], self.fgraph.inputs[0], vectorize=True
+                self.fgraph.outputs[0],
+                self.fgraph.inputs[0],
+                vectorize=use_vectorized_jac,
             )
             self.fgraph.add_output(atleast_2d(jac_wrt_x))
 
@@ -928,6 +943,7 @@ def root(
     variables: TensorVariable,
     method: str = "hybr",
     jac: bool = True,
+    use_vectorized_jac: bool = False,
     optimizer_kwargs: dict | None = None,
 ) -> tuple[TensorVariable, TensorVariable]:
     """
@@ -946,6 +962,10 @@ def root(
     jac : bool, optional
         Whether to compute and use the Jacobian of the `equations` with respect to `variables`.
         Default is True. Most methods require this.
+    use_vectorized_jac: bool, optional
+        Whether to use a vectorized graph (vmap) to compute the jacobian matrix. If False, a scan will be used instead.
+        This comes down to a memory/compute trade-off. Vectorized graphs can be faster, but use more memory.
+        Default is False.
     optimizer_kwargs : dict, optional
         Additional keyword arguments to pass to `scipy.optimize.root`.
 
@@ -969,6 +989,7 @@ def root(
         method=method,
         jac=jac,
         optimizer_kwargs=optimizer_kwargs,
+        use_vectorized_jac=use_vectorized_jac,
     )
 
     solution, success = cast(

From a2e3634ec995d5cc694eadfc45c8ec5a76356ecb Mon Sep 17 00:00:00 2001
From: jessegrabowski <jessegrabowski@gmail.com>
Date: Sat, 20 Sep 2025 13:25:33 -0400
Subject: [PATCH 8/8] pre-commit

---
 pytensor/tensor/optimize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytensor/tensor/optimize.py b/pytensor/tensor/optimize.py
index 25327d4807..7d4bdade77 100644
--- a/pytensor/tensor/optimize.py
+++ b/pytensor/tensor/optimize.py
@@ -7,7 +7,7 @@
 
 import pytensor.scalar as ps
 from pytensor.compile.function import function
-from pytensor.gradient import grad, hessian, jacobian
+from pytensor.gradient import grad, jacobian
 from pytensor.graph.basic import Apply, Constant
 from pytensor.graph.fg import FunctionGraph
 from pytensor.graph.op import ComputeMapType, HasInnerGraph, Op, StorageMapType