44#define _USE_MATH_DEFINES
55#include < math.h>
66
7+ // TODO: make assembly equivalent for Windows x64 (use intrinsic ?)
8+ // ^--- you probably only need to change esp to rsp? -poro
9+
10+ #if (defined(_MSC_VER) && defined(_M_IX86)) || defined(__GNUC__)
11+ #define ASM_MATH_AVAILABLE (1 )
12+ #else /* nor MSVC nor GCC/clang */
13+ #define ASM_MATH_AVAILABLE (0 )
14+ #endif
15+
16+ #if ASM_MATH_AVAILABLE == 1
717#if defined(_MSC_VER) && defined(_M_IX86)
8- // TODO: make assembly equivalent for x64 (use intrinsic ?)
918static __declspec (naked) double __vectorcall fpuPow(double x, double y)
1019{
1120 __asm
@@ -53,7 +62,87 @@ static __declspec(naked) double __vectorcall fpuPow(double x, double y)
5362 ret
5463 }
5564}
65+ #elif defined(__GNUC__)
66+ #if defined(__x86_64__) || defined(__i386__)
67+ __attribute__ ((__naked__,__noinline__)) static double fpuPow(double x, double y)
68+ {
69+ // i386 Linux ABI: pass thru the stack, return in st(0)
70+ // x86_64 SysV ABI: pass/return thru xmm0/1
71+ asm volatile (
72+ #ifdef __x86_64__
73+ " subq $8, %%rsp\n "
74+ #else
75+ " movsd 4(%%esp), %%xmm0\n "
76+ " movsd 12(%%esp), %%xmm1\n "
77+ " subl $8, %%esp\n "
78+ #endif
79+ " xorpd %%xmm2, %%xmm2\n "
80+ " comisd %%xmm2, %%xmm1\n "
81+ " jne 1f\n "
82+
83+ " fld1\n "
84+ " jmp 3f\n "
85+
86+ " 1:\n "
87+ " comisd %%xmm2, %%xmm0\n "
88+ " jne 2f\n "
89+
90+ " fldz\n "
91+ " jmp 3f\n "
92+
93+ " 2:\n "
94+ #ifdef __x86_64__
95+ " movsd %%xmm1, (%%rsp)\n "
96+ " fldl (%%rsp)\n "
97+ " movsd %%xmm0, (%%rsp)\n "
98+ " fldl (%%rsp)\n "
99+ #else
100+ " movsd %%xmm1, (%%esp)\n "
101+ " fldl (%%esp)\n "
102+ " movsd %%xmm0, (%%esp)\n "
103+ " fldl (%%esp)\n "
104+ #endif
105+
106+ " fyl2x\n "
107+ " fld %%st(0)\n "
108+ " frndint\n "
109+ " fsub %%st(0), %%st(1)\n "
110+ " fxch %%st(1)\n "
111+ " fchs\n "
112+ " f2xm1\n "
113+ " fld1\n "
114+ " faddp %%st(0), %%st(1)\n "
115+ " fscale\n "
116+ " fstp %%st(1)\n "
117+
118+ " 3:\n "
119+ #ifdef __x86_64__
120+ " fstpl (%%rsp)\n "
121+ " movsd (%%rsp), %%xmm0\n "
122+ " addq $8, %%rsp\n "
123+ #else
124+ " addl $8, %%esp\n "
125+ #endif
126+ " ret\n "
127+ :// no output
128+ :// no input
129+ :" xmm2" // clobbered
130+ );
131+ }
132+ #else
133+ // __builtin_pow only supports integer exponents... so if the exponent
134+ // is an integer, use __builtin_pow, using some preprocessor magic
135+ #define fpuPow (x, y ) \
136+ ((__builtin_constant_p(y) && ((y) == (int )(y))) \
137+ ? __builtin_pow(x, y) \
138+ : pow(x, y)) \
139+
140+ #endif
141+ #else
142+ #error "Unsupported compiler."
143+ #endif /* compiler */
56144
145+ #if defined(_MSC_VER) && defined(_M_IX86)
57146static __declspec (naked) float __vectorcall fpuPowF(float x, float y)
58147{
59148 __asm
@@ -101,7 +190,87 @@ static __declspec(naked) float __vectorcall fpuPowF(float x, float y)
101190 ret
102191 }
103192}
193+ #elif defined(__GNUC__)
194+ #if defined(__x86_64__) || defined(__i386__)
195+ __attribute__ ((__naked__,__noinline__)) static float fpuPowF(float x, float y)
196+ {
197+ // i386 Linux ABI: pass thru the stack, return in st(0)
198+ // x86_64 SysV ABI: pass/return thru xmm0/1
199+ asm volatile (
200+ #ifdef __x86_64__
201+ " subq $8, %%rsp\n "
202+ #else
203+ " movss 4(%%esp), %%xmm0\n "
204+ " movss 8(%%esp), %%xmm1\n "
205+ " subl $8, %%esp\n "
206+ #endif
207+ " xorps %%xmm2, %%xmm2\n "
208+ " comiss %%xmm2, %%xmm1\n "
209+ " jne 1f\n "
210+
211+ " fld1\n "
212+ " jmp 3f\n "
213+
214+ " 1:\n "
215+ " comiss %%xmm2, %%xmm0\n "
216+ " jne 2f\n "
217+
218+ " fldz\n "
219+ " jmp 3f\n "
220+
221+ " 2:\n "
222+ #ifdef __x86_64__
223+ " movss %%xmm1, (%%rsp)\n "
224+ " flds (%%rsp)\n "
225+ " movss %%xmm0, (%%rsp)\n "
226+ " flds (%%rsp)\n "
227+ #else
228+ " movss %%xmm1, (%%esp)\n "
229+ " flds (%%esp)\n "
230+ " movss %%xmm0, (%%esp)\n "
231+ " flds (%%esp)\n "
232+ #endif
233+
234+ " fyl2x\n "
235+ " fld %%st(0)\n "
236+ " frndint\n "
237+ " fsub %%st(0), %%st(1)\n "
238+ " fxch %%st(1)\n "
239+ " fchs\n "
240+ " f2xm1\n "
241+ " fld1\n "
242+ " faddp %%st(0), %%st(1)\n "
243+ " fscale\n "
244+ " fstp %%st(1)\n "
245+
246+ " 3:\n "
247+ #ifdef __x86_64__
248+ " fstps (%%rsp)\n "
249+ " movss (%%rsp), %%xmm0\n "
250+ " addq $8, %%rsp\n "
251+ #else
252+ " addl $8, %%esp\n "
253+ #endif
254+ " ret\n "
255+ :// no output
256+ :// no input
257+ :" xmm2" // clobbered
258+ );
259+ }
260+ #else
261+ // __builtin_powf only supports integer exponents... so if the exponent
262+ // is an integer, use __builtin_powf, using some preprocessor magic
263+ #define fpuPowF (x, y ) \
264+ ((__builtin_constant_p(y) && ((y) == (int )(y))) \
265+ ? __builtin_powf(x, y) \
266+ : powf(x, y)) \
267+
268+ #endif
269+ #else
270+ #error "Unsupported compiler."
271+ #endif /* compiler */
104272
273+ #if defined(_MSC_VER) && defined(_M_IX86)
105274static __declspec (naked) double __vectorcall fpuCos(double x)
106275{
107276 __asm
@@ -119,7 +288,24 @@ static __declspec(naked) double __vectorcall fpuCos(double x)
119288 ret
120289 }
121290}
122- #endif // defined(_MSC_VER) && defined(_M_IX86)
291+ #elif defined(__GNUC__)
292+ #if defined(__x86_64__) || defined(__i386__)
293+ __attribute__ ((__always_inline__)) inline static double fpuCos(double x)
294+ {
295+ // not writing the *entire* function body in assembly actually helps
296+ // gcc and clang with inlining and LTO
297+ // ... except trying this with fpuPow/F somehow got botched, so those I
298+ // wrote as pure assembly
299+ asm volatile (" fcos\n " :" +t" (x)::);
300+ return x;
301+ }
302+ #else /* x86_64 */
303+ #define fpuCos (x ) __builtin_cos(x)
304+ #endif /* GNUC, platform */
305+ #else
306+ #error "Unsupported compiler."
307+ #endif /* compiler */
308+ #endif // ASM_MATH_AVAILABLE == 1
123309
124310namespace WaveSabreCore
125311{
@@ -138,7 +324,7 @@ namespace WaveSabreCore
138324 for (int i = 0 ; i < fastCosTabSize + 1 ; i++)
139325 {
140326 double phase = double (i) * ((M_PI * 2 ) / fastCosTabSize);
141- #if defined(_MSC_VER) && defined(_M_IX86)
327+ #if ASM_MATH_AVAILABLE == 1
142328 fastCosTab[i] = fpuCos (phase);
143329#else
144330 fastCosTab[i] = cos (phase);
@@ -153,7 +339,7 @@ namespace WaveSabreCore
153339
154340 double Helpers::Pow (double x, double y)
155341 {
156- #if defined(_MSC_VER) && defined(_M_IX86)
342+ #if ASM_MATH_AVAILABLE == 1
157343 return fpuPow (x, y);
158344#else
159345 return pow (x, y);
@@ -162,7 +348,7 @@ namespace WaveSabreCore
162348
163349 float Helpers::PowF (float x, float y)
164350 {
165- #if defined(_MSC_VER) && defined(_M_IX86)
351+ #if ASM_MATH_AVAILABLE == 1
166352 return fpuPowF (x, y);
167353#else
168354 return powf (x, y);
@@ -365,7 +551,7 @@ namespace WaveSabreCore
365551 {
366552 return (Spread)(int )(param * 2 .0f );
367553 }
368-
554+
369555 float Helpers::SpreadToParam (Spread spread)
370556 {
371557 return (float )spread / 2 .0f ;
0 commit comments