Skip to content

Commit b9f9d1c

Browse files
RyanJDickhipsterusername
authored andcommitted
Increase the VAE decode memory estimates. to account for memory reserved by the memory allocator, but not allocated, and to generally be more conservative.
1 parent 7567ee2 commit b9f9d1c

File tree

3 files changed

+4
-16
lines changed

3 files changed

+4
-16
lines changed

invokeai/app/invocations/flux_vae_decode.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,11 @@ class FluxVaeDecodeInvocation(BaseInvocation, WithMetadata, WithBoard):
4141

4242
def _estimate_working_memory(self, latents: torch.Tensor, vae: AutoEncoder) -> int:
4343
"""Estimate the working memory required by the invocation in bytes."""
44-
# It was found experimentally that the peak working memory scales linearly with the number of pixels and the
45-
# element size (precision).
4644
out_h = LATENT_SCALE_FACTOR * latents.shape[-2]
4745
out_w = LATENT_SCALE_FACTOR * latents.shape[-1]
4846
element_size = next(vae.parameters()).element_size()
49-
scaling_constant = 1090 # Determined experimentally.
47+
scaling_constant = 2200 # Determined experimentally.
5048
working_memory = out_h * out_w * element_size * scaling_constant
51-
52-
# We add a 20% buffer to the working memory estimate to be safe.
53-
working_memory = working_memory * 1.2
5449
return int(working_memory)
5550

5651
def _vae_decode(self, vae_info: LoadedModel, latents: torch.Tensor) -> Image.Image:

invokeai/app/invocations/latents_to_image.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def _estimate_working_memory(
6060
# It was found experimentally that the peak working memory scales linearly with the number of pixels and the
6161
# element size (precision). This estimate is accurate for both SD1 and SDXL.
6262
element_size = 4 if self.fp32 else 2
63-
scaling_constant = 960 # Determined experimentally.
63+
scaling_constant = 2200 # Determined experimentally.
6464

6565
if use_tiling:
6666
tile_size = self.tile_size
@@ -84,9 +84,7 @@ def _estimate_working_memory(
8484
# If we are running in FP32, then we should account for the likely increase in model size (~250MB).
8585
working_memory += 250 * 2**20
8686

87-
# We add 20% to the working memory estimate to be safe.
88-
working_memory = int(working_memory * 1.2)
89-
return working_memory
87+
return int(working_memory)
9088

9189
@torch.no_grad()
9290
def invoke(self, context: InvocationContext) -> ImageOutput:

invokeai/app/invocations/sd3_latents_to_image.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,16 +43,11 @@ class SD3LatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
4343

4444
def _estimate_working_memory(self, latents: torch.Tensor, vae: AutoencoderKL) -> int:
4545
"""Estimate the working memory required by the invocation in bytes."""
46-
# It was found experimentally that the peak working memory scales linearly with the number of pixels and the
47-
# element size (precision).
4846
out_h = LATENT_SCALE_FACTOR * latents.shape[-2]
4947
out_w = LATENT_SCALE_FACTOR * latents.shape[-1]
5048
element_size = next(vae.parameters()).element_size()
51-
scaling_constant = 1230 # Determined experimentally.
49+
scaling_constant = 2200 # Determined experimentally.
5250
working_memory = out_h * out_w * element_size * scaling_constant
53-
54-
# We add a 20% buffer to the working memory estimate to be safe.
55-
working_memory = working_memory * 1.2
5651
return int(working_memory)
5752

5853
@torch.no_grad()

0 commit comments

Comments
 (0)