Skip to content

Commit 7210c2f

Browse files
MefiresuGaryOderNichts
authored andcommitted
wiiu/render: Improve SDL_UpdateTexture performance
When possible, use the DMA to copy the whole texture data at once, this improves performance by nearly 15x in the best case. In all other cases, use OSBlockMove instead of the base memcpy implementation for a ~1.5x speedup in the slowest path.
1 parent 07327cc commit 7210c2f

File tree

1 file changed

+30
-4
lines changed

1 file changed

+30
-4
lines changed

src/render/wiiu/SDL_rtexture_wiiu.c

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@
3333
#include <gx2/mem.h>
3434
#include <gx2r/surface.h>
3535
#include <gx2r/resource.h>
36+
#include <coreinit/cache.h>
37+
#include <coreinit/memory.h>
38+
#include <dmae/mem.h>
39+
#include <dmae/sync.h>
3640

3741
#include <malloc.h>
3842
#include <stdarg.h>
@@ -170,8 +174,10 @@ int WIIU_SDL_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
170174
WIIU_VideoData *videodata = (WIIU_VideoData *) SDL_GetVideoDevice()->driverdata;
171175
Uint32 BytesPerPixel = SDL_BYTESPERPIXEL(texture->format);
172176
size_t length = rect->w * BytesPerPixel;
177+
size_t total_size = length * rect->h;
173178
Uint8 *src = (Uint8 *) pixels, *dst;
174179
int row, dst_pitch;
180+
bool src_aligned, dst_aligned;
175181

176182
if (!videodata->hasForeground) {
177183
return 0;
@@ -180,10 +186,30 @@ int WIIU_SDL_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
180186
/* We write the rules, and we say all textures are streaming */
181187
WIIU_SDL_LockTexture(renderer, texture, rect, (void**)&dst, &dst_pitch);
182188

183-
for (row = 0; row < rect->h; ++row) {
184-
SDL_memcpy(dst, src, length);
185-
src += pitch;
186-
dst += dst_pitch;
189+
if (length == pitch && length == dst_pitch) {
190+
/* DMA requires 8 byte alignment */
191+
src_aligned = ((uintptr_t)src & 7U) == 0;
192+
dst_aligned = ((uintptr_t)dst & 7U) == 0;
193+
/* DMA works best on textures bigger than 5KiB */
194+
if ((total_size > 5120) && (src_aligned && dst_aligned)) {
195+
DCFlushRange(src, total_size);
196+
/* Run a single DMA transfer and wait until transfer is done */
197+
while (!DMAEWaitDone(DMAECopyMem(dst, src, total_size >> 2, DMAE_SWAP_NONE)));
198+
} else {
199+
/* Otherwise, fallback to a single memory copy */
200+
OSBlockMove(dst, src, total_size, true);
201+
}
202+
} else {
203+
/* Flush source first */
204+
DCFlushRange(src, total_size);
205+
for (row = 0; row < rect->h; ++row) {
206+
/* Do not flush per line here */
207+
OSBlockMove(dst, src, length, false);
208+
src += pitch;
209+
dst += dst_pitch;
210+
}
211+
/* Now that we're done, we can flush the entire destination at once */
212+
DCFlushRange(dst, dst_pitch * rect->h);
187213
}
188214

189215
WIIU_SDL_UnlockTexture(renderer, texture);

0 commit comments

Comments
 (0)