Increase the VAE decode memory estimates. to account for memory reserved by the memory allocator, but not allocated, and to generally be more conservative.

RyanJDick · hipsterusername · commit b9f9d1c1521c · 2025-02-28T17:18:57.000-05:00
diff --git a/invokeai/app/invocations/flux_vae_decode.py b/invokeai/app/invocations/flux_vae_decode.py
@@ -41,16 +41,11 @@ class FluxVaeDecodeInvocation(BaseInvocation, WithMetadata, WithBoard):
 
     def _estimate_working_memory(self, latents: torch.Tensor, vae: AutoEncoder) -> int:
         """Estimate the working memory required by the invocation in bytes."""
-        # It was found experimentally that the peak working memory scales linearly with the number of pixels and the
-        # element size (precision).
         out_h = LATENT_SCALE_FACTOR * latents.shape[-2]
         out_w = LATENT_SCALE_FACTOR * latents.shape[-1]
         element_size = next(vae.parameters()).element_size()
-        scaling_constant = 1090  # Determined experimentally.
+        scaling_constant = 2200  # Determined experimentally.
         working_memory = out_h * out_w * element_size * scaling_constant
-
-        # We add a 20% buffer to the working memory estimate to be safe.
-        working_memory = working_memory * 1.2
         return int(working_memory)
 
     def _vae_decode(self, vae_info: LoadedModel, latents: torch.Tensor) -> Image.Image:
diff --git a/invokeai/app/invocations/latents_to_image.py b/invokeai/app/invocations/latents_to_image.py
@@ -60,7 +60,7 @@ def _estimate_working_memory(
         # It was found experimentally that the peak working memory scales linearly with the number of pixels and the
         # element size (precision). This estimate is accurate for both SD1 and SDXL.
         element_size = 4 if self.fp32 else 2
-        scaling_constant = 960  # Determined experimentally.
+        scaling_constant = 2200  # Determined experimentally.
 
         if use_tiling:
             tile_size = self.tile_size
@@ -84,9 +84,7 @@ def _estimate_working_memory(
             # If we are running in FP32, then we should account for the likely increase in model size (~250MB).
             working_memory += 250 * 2**20
 
-        # We add 20% to the working memory estimate to be safe.
-        working_memory = int(working_memory * 1.2)
-        return working_memory
+        return int(working_memory)
 
     @torch.no_grad()
     def invoke(self, context: InvocationContext) -> ImageOutput:
diff --git a/invokeai/app/invocations/sd3_latents_to_image.py b/invokeai/app/invocations/sd3_latents_to_image.py
@@ -43,16 +43,11 @@ class SD3LatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
 
     def _estimate_working_memory(self, latents: torch.Tensor, vae: AutoencoderKL) -> int:
         """Estimate the working memory required by the invocation in bytes."""
-        # It was found experimentally that the peak working memory scales linearly with the number of pixels and the
-        # element size (precision).
         out_h = LATENT_SCALE_FACTOR * latents.shape[-2]
         out_w = LATENT_SCALE_FACTOR * latents.shape[-1]
         element_size = next(vae.parameters()).element_size()
-        scaling_constant = 1230  # Determined experimentally.
+        scaling_constant = 2200  # Determined experimentally.
         working_memory = out_h * out_w * element_size * scaling_constant
-
-        # We add a 20% buffer to the working memory estimate to be safe.
-        working_memory = working_memory * 1.2
         return int(working_memory)
 
     @torch.no_grad()