|  | 
|  | 1 | +<!--Copyright 2025 The HuggingFace Team. All rights reserved. | 
|  | 2 | +
 | 
|  | 3 | +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with | 
|  | 4 | +the License. You may obtain a copy of the License at | 
|  | 5 | +
 | 
|  | 6 | +http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 7 | +
 | 
|  | 8 | +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on | 
|  | 9 | +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the | 
|  | 10 | +specific language governing permissions and limitations under the License. | 
|  | 11 | +--> | 
|  | 12 | + | 
|  | 13 | +# Batch inference | 
|  | 14 | + | 
|  | 15 | +Batch inference processes multiple prompts at a time to increase throughput. It is more efficient because processing multiple prompts at once maximizes GPU usage versus processing a single prompt and underutilizing the GPU. | 
|  | 16 | + | 
|  | 17 | +The downside is increased latency because you must wait for the entire batch to complete, and more GPU memory is required for large batches. | 
|  | 18 | + | 
|  | 19 | +<hfoptions id="usage"> | 
|  | 20 | +<hfoption id="text-to-image"> | 
|  | 21 | + | 
|  | 22 | +For text-to-image, pass a list of prompts to the pipeline. | 
|  | 23 | + | 
|  | 24 | +```py | 
|  | 25 | +import torch | 
|  | 26 | +from diffusers import DiffusionPipeline | 
|  | 27 | + | 
|  | 28 | +pipeline = DiffusionPipeline.from_pretrained( | 
|  | 29 | +    "stabilityai/stable-diffusion-xl-base-1.0", | 
|  | 30 | +    torch_dtype=torch.float16 | 
|  | 31 | +).to("cuda") | 
|  | 32 | + | 
|  | 33 | +prompts = [ | 
|  | 34 | +    "cinematic photo of A beautiful sunset over mountains, 35mm photograph, film, professional, 4k, highly detailed", | 
|  | 35 | +    "cinematic film still of a cat basking in the sun on a roof in Turkey, highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain", | 
|  | 36 | +    "pixel-art a cozy coffee shop interior, low-res, blocky, pixel art style, 8-bit graphics" | 
|  | 37 | +] | 
|  | 38 | + | 
|  | 39 | +images = pipeline( | 
|  | 40 | +    prompt=prompts, | 
|  | 41 | +).images | 
|  | 42 | + | 
|  | 43 | +fig, axes = plt.subplots(2, 2, figsize=(12, 12)) | 
|  | 44 | +axes = axes.flatten() | 
|  | 45 | + | 
|  | 46 | +for i, image in enumerate(images): | 
|  | 47 | +    axes[i].imshow(image) | 
|  | 48 | +    axes[i].set_title(f"Image {i+1}") | 
|  | 49 | +    axes[i].axis('off') | 
|  | 50 | + | 
|  | 51 | +plt.tight_layout() | 
|  | 52 | +plt.show() | 
|  | 53 | +``` | 
|  | 54 | + | 
|  | 55 | +To generate multiple variations of one prompt, use the `num_images_per_prompt` argument. | 
|  | 56 | + | 
|  | 57 | +```py | 
|  | 58 | +import torch | 
|  | 59 | +import matplotlib.pyplot as plt | 
|  | 60 | +from diffusers import DiffusionPipeline | 
|  | 61 | + | 
|  | 62 | +pipeline = DiffusionPipeline.from_pretrained( | 
|  | 63 | +    "stabilityai/stable-diffusion-xl-base-1.0", | 
|  | 64 | +    torch_dtype=torch.float16 | 
|  | 65 | +).to("cuda") | 
|  | 66 | + | 
|  | 67 | +images = pipeline( | 
|  | 68 | +    prompt="pixel-art a cozy coffee shop interior, low-res, blocky, pixel art style, 8-bit graphics", | 
|  | 69 | +    num_images_per_prompt=4 | 
|  | 70 | +).images | 
|  | 71 | + | 
|  | 72 | +fig, axes = plt.subplots(2, 2, figsize=(12, 12)) | 
|  | 73 | +axes = axes.flatten() | 
|  | 74 | + | 
|  | 75 | +for i, image in enumerate(images): | 
|  | 76 | +    axes[i].imshow(image) | 
|  | 77 | +    axes[i].set_title(f"Image {i+1}") | 
|  | 78 | +    axes[i].axis('off') | 
|  | 79 | + | 
|  | 80 | +plt.tight_layout() | 
|  | 81 | +plt.show() | 
|  | 82 | +``` | 
|  | 83 | + | 
|  | 84 | +Combine both approaches to generate different variations of different prompts. | 
|  | 85 | + | 
|  | 86 | +```py | 
|  | 87 | +images = pipeline( | 
|  | 88 | +    prompt=prompts, | 
|  | 89 | +    num_images_per_prompt=2, | 
|  | 90 | +).images | 
|  | 91 | + | 
|  | 92 | +fig, axes = plt.subplots(2, 2, figsize=(12, 12)) | 
|  | 93 | +axes = axes.flatten() | 
|  | 94 | + | 
|  | 95 | +for i, image in enumerate(images): | 
|  | 96 | +    axes[i].imshow(image) | 
|  | 97 | +    axes[i].set_title(f"Image {i+1}") | 
|  | 98 | +    axes[i].axis('off') | 
|  | 99 | + | 
|  | 100 | +plt.tight_layout() | 
|  | 101 | +plt.show() | 
|  | 102 | +``` | 
|  | 103 | + | 
|  | 104 | +</hfoption> | 
|  | 105 | +<hfoption id="image-to-image"> | 
|  | 106 | + | 
|  | 107 | +For image-to-image, pass a list of input images and prompts to the pipeline. | 
|  | 108 | + | 
|  | 109 | +```py | 
|  | 110 | +import torch | 
|  | 111 | +from diffusers.utils import load_image | 
|  | 112 | +from diffusers import DiffusionPipeline | 
|  | 113 | + | 
|  | 114 | +pipeline = DiffusionPipeline.from_pretrained( | 
|  | 115 | +    "stabilityai/stable-diffusion-xl-base-1.0", | 
|  | 116 | +    torch_dtype=torch.float16 | 
|  | 117 | +).to("cuda") | 
|  | 118 | + | 
|  | 119 | +input_images = [ | 
|  | 120 | +    load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"), | 
|  | 121 | +    load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png"), | 
|  | 122 | +    load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/detail-prompt.png") | 
|  | 123 | +] | 
|  | 124 | + | 
|  | 125 | +prompts = [ | 
|  | 126 | +    "cinematic photo of a beautiful sunset over mountains, 35mm photograph, film, professional, 4k, highly detailed", | 
|  | 127 | +    "cinematic film still of a cat basking in the sun on a roof in Turkey, highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain", | 
|  | 128 | +    "pixel-art a cozy coffee shop interior, low-res, blocky, pixel art style, 8-bit graphics" | 
|  | 129 | +] | 
|  | 130 | + | 
|  | 131 | +images = pipeline( | 
|  | 132 | +    prompt=prompts, | 
|  | 133 | +    image=input_images, | 
|  | 134 | +    guidance_scale=8.0, | 
|  | 135 | +    strength=0.5 | 
|  | 136 | +).images | 
|  | 137 | + | 
|  | 138 | +fig, axes = plt.subplots(2, 2, figsize=(12, 12)) | 
|  | 139 | +axes = axes.flatten() | 
|  | 140 | + | 
|  | 141 | +for i, image in enumerate(images): | 
|  | 142 | +    axes[i].imshow(image) | 
|  | 143 | +    axes[i].set_title(f"Image {i+1}") | 
|  | 144 | +    axes[i].axis('off') | 
|  | 145 | + | 
|  | 146 | +plt.tight_layout() | 
|  | 147 | +plt.show() | 
|  | 148 | +``` | 
|  | 149 | + | 
|  | 150 | +To generate multiple variations of one prompt, use the `num_images_per_prompt` argument. | 
|  | 151 | + | 
|  | 152 | +```py | 
|  | 153 | +import torch | 
|  | 154 | +import matplotlib.pyplot as plt | 
|  | 155 | +from diffusers.utils import load_image | 
|  | 156 | +from diffusers import DiffusionPipeline | 
|  | 157 | + | 
|  | 158 | +pipeline = DiffusionPipeline.from_pretrained( | 
|  | 159 | +    "stabilityai/stable-diffusion-xl-base-1.0", | 
|  | 160 | +    torch_dtype=torch.float16 | 
|  | 161 | +).to("cuda") | 
|  | 162 | + | 
|  | 163 | +input_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/detail-prompt.png") | 
|  | 164 | + | 
|  | 165 | +images = pipeline( | 
|  | 166 | +    prompt="pixel-art a cozy coffee shop interior, low-res, blocky, pixel art style, 8-bit graphics", | 
|  | 167 | +    image=input_image, | 
|  | 168 | +    num_images_per_prompt=4 | 
|  | 169 | +).images | 
|  | 170 | + | 
|  | 171 | +fig, axes = plt.subplots(2, 2, figsize=(12, 12)) | 
|  | 172 | +axes = axes.flatten() | 
|  | 173 | + | 
|  | 174 | +for i, image in enumerate(images): | 
|  | 175 | +    axes[i].imshow(image) | 
|  | 176 | +    axes[i].set_title(f"Image {i+1}") | 
|  | 177 | +    axes[i].axis('off') | 
|  | 178 | + | 
|  | 179 | +plt.tight_layout() | 
|  | 180 | +plt.show() | 
|  | 181 | +``` | 
|  | 182 | + | 
|  | 183 | +Combine both approaches to generate different variations of different prompts. | 
|  | 184 | + | 
|  | 185 | +```py | 
|  | 186 | +input_images = [ | 
|  | 187 | +    load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png"), | 
|  | 188 | +    load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/detail-prompt.png") | 
|  | 189 | +] | 
|  | 190 | + | 
|  | 191 | +prompts = [ | 
|  | 192 | +    "cinematic film still of a cat basking in the sun on a roof in Turkey, highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain", | 
|  | 193 | +    "pixel-art a cozy coffee shop interior, low-res, blocky, pixel art style, 8-bit graphics" | 
|  | 194 | +] | 
|  | 195 | + | 
|  | 196 | +images = pipeline( | 
|  | 197 | +    prompt=prompts, | 
|  | 198 | +    image=input_images, | 
|  | 199 | +    num_images_per_prompt=2, | 
|  | 200 | +).images | 
|  | 201 | + | 
|  | 202 | +fig, axes = plt.subplots(2, 2, figsize=(12, 12)) | 
|  | 203 | +axes = axes.flatten() | 
|  | 204 | + | 
|  | 205 | +for i, image in enumerate(images): | 
|  | 206 | +    axes[i].imshow(image) | 
|  | 207 | +    axes[i].set_title(f"Image {i+1}") | 
|  | 208 | +    axes[i].axis('off') | 
|  | 209 | + | 
|  | 210 | +plt.tight_layout() | 
|  | 211 | +plt.show() | 
|  | 212 | +``` | 
|  | 213 | + | 
|  | 214 | +</hfoption> | 
|  | 215 | +</hfoptions> | 
|  | 216 | + | 
|  | 217 | +## Deterministic generation | 
|  | 218 | + | 
|  | 219 | +Enable reproducible batch generation by passing a list of [Generator’s](https://pytorch.org/docs/stable/generated/torch.Generator.html) to the pipeline and tie each `Generator` to a seed to reuse it. | 
|  | 220 | + | 
|  | 221 | +Use a list comprehension to iterate over the batch size specified in `range()` to create a unique `Generator` object for each image in the batch. | 
|  | 222 | + | 
|  | 223 | +Don't multiply the `Generator` by the batch size because that only creates one `Generator` object that is used sequentially for each image in the batch. | 
|  | 224 | + | 
|  | 225 | +```py | 
|  | 226 | +generator = [torch.Generator(device="cuda").manual_seed(0)] * 3 | 
|  | 227 | +``` | 
|  | 228 | + | 
|  | 229 | +Pass the `generator` to the pipeline. | 
|  | 230 | + | 
|  | 231 | +```py | 
|  | 232 | +import torch | 
|  | 233 | +from diffusers import DiffusionPipeline | 
|  | 234 | + | 
|  | 235 | +pipeline = DiffusionPipeline.from_pretrained( | 
|  | 236 | +    "stabilityai/stable-diffusion-xl-base-1.0", | 
|  | 237 | +    torch_dtype=torch.float16 | 
|  | 238 | +).to("cuda") | 
|  | 239 | + | 
|  | 240 | +generator = [torch.Generator(device="cuda").manual_seed(i) for i in range(3)] | 
|  | 241 | +prompts = [ | 
|  | 242 | +    "cinematic photo of A beautiful sunset over mountains, 35mm photograph, film, professional, 4k, highly detailed", | 
|  | 243 | +    "cinematic film still of a cat basking in the sun on a roof in Turkey, highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain", | 
|  | 244 | +    "pixel-art a cozy coffee shop interior, low-res, blocky, pixel art style, 8-bit graphics" | 
|  | 245 | +] | 
|  | 246 | + | 
|  | 247 | +images = pipeline( | 
|  | 248 | +    prompt=prompts, | 
|  | 249 | +    generator=generator | 
|  | 250 | +).images | 
|  | 251 | + | 
|  | 252 | +fig, axes = plt.subplots(2, 2, figsize=(12, 12)) | 
|  | 253 | +axes = axes.flatten() | 
|  | 254 | + | 
|  | 255 | +for i, image in enumerate(images): | 
|  | 256 | +    axes[i].imshow(image) | 
|  | 257 | +    axes[i].set_title(f"Image {i+1}") | 
|  | 258 | +    axes[i].axis('off') | 
|  | 259 | + | 
|  | 260 | +plt.tight_layout() | 
|  | 261 | +plt.show() | 
|  | 262 | +``` | 
|  | 263 | + | 
|  | 264 | +You can use this to iteratively select an image associated with a seed and then improve on it by crafting a more detailed prompt. | 
0 commit comments