3333#include <gx2/mem.h>
3434#include <gx2r/surface.h>
3535#include <gx2r/resource.h>
36+ #include <coreinit/cache.h>
37+ #include <coreinit/memory.h>
38+ #include <dmae/mem.h>
39+ #include <dmae/sync.h>
3640
3741#include <malloc.h>
3842#include <stdarg.h>
@@ -170,8 +174,10 @@ int WIIU_SDL_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
170174 WIIU_VideoData * videodata = (WIIU_VideoData * ) SDL_GetVideoDevice ()-> driverdata ;
171175 Uint32 BytesPerPixel = SDL_BYTESPERPIXEL (texture -> format );
172176 size_t length = rect -> w * BytesPerPixel ;
177+ size_t total_size = length * rect -> h ;
173178 Uint8 * src = (Uint8 * ) pixels , * dst ;
174179 int row , dst_pitch ;
180+ bool src_aligned , dst_aligned ;
175181
176182 if (!videodata -> hasForeground ) {
177183 return 0 ;
@@ -180,10 +186,30 @@ int WIIU_SDL_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
180186 /* We write the rules, and we say all textures are streaming */
181187 WIIU_SDL_LockTexture (renderer , texture , rect , (void * * )& dst , & dst_pitch );
182188
183- for (row = 0 ; row < rect -> h ; ++ row ) {
184- SDL_memcpy (dst , src , length );
185- src += pitch ;
186- dst += dst_pitch ;
189+ if (length == pitch && length == dst_pitch ) {
190+ /* DMA requires 8 byte alignment */
191+ src_aligned = ((uintptr_t )src & 7U ) == 0 ;
192+ dst_aligned = ((uintptr_t )dst & 7U ) == 0 ;
193+ /* DMA works best on textures bigger than 5KiB */
194+ if ((total_size > 5120 ) && (src_aligned && dst_aligned )) {
195+ DCFlushRange (src , total_size );
196+ /* Run a single DMA transfer and wait until transfer is done */
197+ while (!DMAEWaitDone (DMAECopyMem (dst , src , total_size >> 2 , DMAE_SWAP_NONE )));
198+ } else {
199+ /* Otherwise, fallback to a single memory copy */
200+ OSBlockMove (dst , src , total_size , true);
201+ }
202+ } else {
203+ /* Flush source first */
204+ DCFlushRange (src , total_size );
205+ for (row = 0 ; row < rect -> h ; ++ row ) {
206+ /* Do not flush per line here */
207+ OSBlockMove (dst , src , length , false);
208+ src += pitch ;
209+ dst += dst_pitch ;
210+ }
211+ /* Now that we're done, we can flush the entire destination at once */
212+ DCFlushRange (dst , dst_pitch * rect -> h );
187213 }
188214
189215 WIIU_SDL_UnlockTexture (renderer , texture );
0 commit comments