File tree Expand file tree Collapse file tree 2 files changed +34
-0
lines changed Expand file tree Collapse file tree 2 files changed +34
-0
lines changed Original file line number Diff line number Diff line change 1+ #ifndef _cuda_compat_h
2+ #define _cuda_compat_h
3+
4+ // atomicAdd for half types, to support CC < 7.x
5+
6+ __device__ __forceinline__ void atomicAdd_half (half * address , half val )
7+ {
8+ unsigned int * address_as_ui = (unsigned int * ) ((char * )address - ((size_t )address & 2 ));
9+ unsigned int old = * address_as_ui ;
10+ unsigned int assumed ;
11+
12+ do
13+ {
14+ assumed = old ;
15+ __half_raw hsum ;
16+ hsum .x = (size_t )address & 2 ? (old >> 16 ) : (old & 0xffff );
17+ half tmpres = __hadd (hsum , val );
18+ hsum = __half_raw (tmpres );
19+ old = (size_t )address & 2 ? (old & 0xffff ) | (hsum .x << 16 ) : (old & 0xffff0000 ) | hsum .x ;
20+ old = atomicCAS (address_as_ui , assumed , old );
21+ }
22+ while (assumed != old );
23+ }
24+
25+ #ifdef __CUDA_ARCH__
26+ #if __CUDA_ARCH__ < 700
27+
28+ __device__ __forceinline__ void atomicAdd (half * address , half val ) { atomicAdd_half (address , val ); }
29+
30+ #endif
31+ #endif
32+
33+ #endif
Original file line number Diff line number Diff line change 22#include " column_remap.h"
33#include " util.h"
44#include " matrix.h"
5+ #include " cuda_compat.h"
56
67// Block size
78
You can’t perform that action at this time.
0 commit comments