@@ -254,13 +254,6 @@ static XNN_INLINE xnn_simd_f32_t xnn_set1_f32(float v) {
254
254
}
255
255
256
256
// Tail load/store operations.
257
- static XNN_INLINE xnn_simd_f32_t
258
- xnn_load_tail_f32 (const float * input , size_t num_elements ) XNN_OOB_READS {
259
- assert (num_elements > 0 );
260
- assert (num_elements < xnn_simd_size_f32 );
261
- return vld1q_f32 (input );
262
- }
263
-
264
257
// TODO: Use direct load of 1,2 or 3 floats
265
258
// Consider clearing pad values to 0
266
259
static XNN_INLINE xnn_simd_f32_t xnn_load_tail_safe_f32 (const float * input ,
@@ -284,6 +277,17 @@ static XNN_INLINE xnn_simd_f32_t xnn_load_tail_safe_f32(const float* input,
284
277
return vld1q_f32 (padded );
285
278
}
286
279
280
+ static XNN_INLINE xnn_simd_f32_t
281
+ xnn_load_tail_f32 (const float * input , size_t num_elements ) XNN_OOB_READS {
282
+ assert (num_elements > 0 );
283
+ assert (num_elements < xnn_simd_size_f32 );
284
+ #if XNN_COMPILER_HAS_FEATURE (thread_sanitizer )
285
+ return xnn_load_tail_safe_f32 (input , num_elements );
286
+ #else
287
+ return vld1q_f32 (input );
288
+ #endif
289
+ }
290
+
287
291
static XNN_INLINE void xnn_store_tail_f32 (float * output , xnn_simd_f32_t v ,
288
292
size_t num_elements ) {
289
293
assert (num_elements > 0 );
0 commit comments