--- -/src/math/x86_64/remquol.c +++ +/src/math/x86_64/remquol.c @@ -16,2 +16,2 @@ - unsigned fpsr; - do __asm__ ("fprem1; fnstsw %%ax" : "+t"(t), "=a"(fpsr) : "u"(y)); + unsigned short fpsr; + do __asm__ ("fprem1; fnstsw %0" : "=a"(fpsr), "+t"(t) : "u"(y)); @@ -23,6 +23,1 @@ - unsigned char i = fpsr >> 8; - i = i>>4 | i<<4; - /* i[5:2] is now {b0 b2 ? b1}. Retrieve {0 b2 b1 b0} via - * in-register table lookup. */ - unsigned qbits = 0x7575313164642020 >> (i & 60); - qbits &= 7; + unsigned qbits = (fpsr & 0x4300) * 0x910000u >> 29; --- /dev/null +++ +/src/math/i386/remquof.c @@ -0,0 +1,15 @@ +#include + +float remquof(float x, float y, int *quo) +{ + /* see ../x86_64/remquol.c */ + signed char *cx = (void *)&x, *cy = (void *)&y; + __asm__ ("" :: "X"(cx), "X"(cy)); + float t = x; + unsigned short fpsr; + do __asm__ ("fprem1; fnstsw %0" : "=a"(fpsr), "+t"(t) : "u"(y)); + while (fpsr & 0x400); + unsigned qbits = (fpsr & 0x4300) * 0x910000u >> 29; + *quo = (cx[sizeof(x) - 1]^cy[sizeof(y) - 1]) < 0 ? -qbits : qbits; + return t; +} --- -/src/math/i386/remquof.s +++ /dev/null @@ -1,1 +0,0 @@ -# see remquo.s --- /dev/null +++ +/src/math/i386/remquol.c @@ -0,0 +1,17 @@ +#include + +long double remquol(long double x, long double y, int *quo) +{ + /* see ../x86_64/remquol.c */ + signed char *cx = (void *)&x, *cy = (void *)&y; + __asm__ ("" :: "X"(cx), "X"(cy)); + long double t = x; + unsigned short fpsr; + do __asm__ ("fprem1; fnstsw %0" : "=a"(fpsr), "+t"(t) : "u"(y)); + while (fpsr & 0x400); + unsigned qbits = (fpsr & 0x4300) * 0x910000u >> 29; + /* [sizeof(long double) - 1] not usable here due to + GCC's braindead handling of long double alias tbyte */ + *quo = (cx[9]^cy[9]) < 0 ? -qbits : qbits; + return t; +} --- -/src/math/i386/remquol.s +++ /dev/null @@ -1,1 +0,0 @@ -# see remquo.s --- /dev/null +++ +/src/math/i386/remquo.c @@ -0,0 +1,14 @@ +#include + +double remquo(double x, double y, int *quo) +{ + /* see ../x86_64/remquol.c */ + signed char *cx = (void *)&x, *cy = (void *)&y; + __asm__ ("" :: "X"(cx), "X"(cy)); + double t = x; + unsigned short fpsr; + do __asm__ ("fprem1; fnstsw %0" : "=a"(fpsr), "+t"(t) : "u"(y)); + while (fpsr & 0x400); + unsigned qbits = (fpsr & 0x4300) * 0x910000u >> 29; + *quo = (cx[sizeof(x) - 1]^cy[sizeof(y) - 1]) < 0 ? -qbits : qbits; + return t; +} --- -/src/math/i386/remquo.s +++ /dev/null @@ -1,50 +0,0 @@ -.global remquof -.type remquof,@function -remquof: - mov 12(%esp),%ecx - flds 8(%esp) - flds 4(%esp) - mov 11(%esp),%dh - xor 7(%esp),%dh - jmp 1f - -.global remquol -.type remquol,@function -remquol: - mov 28(%esp),%ecx - fldt 16(%esp) - fldt 4(%esp) - mov 25(%esp),%dh - xor 13(%esp),%dh - jmp 1f - -.global remquo -.type remquo,@function -remquo: - mov 20(%esp),%ecx - fldl 12(%esp) - fldl 4(%esp) - mov 19(%esp),%dh - xor 11(%esp),%dh -1: fprem1 - fnstsw %ax - sahf - jp 1b - fstp %st(1) - mov %ah,%dl - shr %dl - and $1,%dl - mov %ah,%al - shr $5,%al - and $2,%al - or %al,%dl - mov %ah,%al - shl $2,%al - and $4,%al - or %al,%dl - test %dh,%dh - jns 1f - neg %dl -1: movsbl %dl,%edx - mov %edx,(%ecx) - ret