--- -/src/math/x86_64/remquol.c
+++ +/src/math/x86_64/remquol.c
@@ -16,2 +16,2 @@
-	unsigned fpsr;
-	do __asm__ ("fprem1; fnstsw %%ax" : "+t"(t), "=a"(fpsr) : "u"(y));
+	unsigned short fpsr;
+	do __asm__ ("fprem1; fnstsw %0" : "=a"(fpsr), "+t"(t) : "u"(y));
@@ -23,6 +23,1 @@
-	unsigned char i = fpsr >> 8;
-	i = i>>4 | i<<4;
-	/* i[5:2] is now {b0 b2 ? b1}. Retrieve {0 b2 b1 b0} via
-	 * in-register table lookup. */
-	unsigned qbits = 0x7575313164642020 >> (i & 60);
-	qbits &= 7;
+	unsigned qbits = (fpsr & 0x4300) * 0x910000u >> 29;

--- /dev/null
+++ +/src/math/i386/remquof.c
@@ -0,0 +1,15 @@
+#include <math.h>
+
+float remquof(float x, float y, int *quo)
+{
+	/* see ../x86_64/remquol.c */
+	signed char *cx = (void *)&x, *cy = (void *)&y;
+	__asm__ ("" :: "X"(cx), "X"(cy));
+	float t = x;
+	unsigned short fpsr;
+	do __asm__ ("fprem1; fnstsw %0" : "=a"(fpsr), "+t"(t) : "u"(y));
+	while (fpsr & 0x400);
+	unsigned qbits = (fpsr & 0x4300) * 0x910000u >> 29;
+	*quo = (cx[sizeof(x) - 1]^cy[sizeof(y) - 1]) < 0 ? -qbits : qbits;
+	return t;
+}

--- -/src/math/i386/remquof.s
+++ /dev/null
@@ -1,1 +0,0 @@
-# see remquo.s

--- /dev/null
+++ +/src/math/i386/remquol.c
@@ -0,0 +1,17 @@
+#include <math.h>
+
+long double remquol(long double x, long double y, int *quo)
+{
+	/* see ../x86_64/remquol.c */
+	signed char *cx = (void *)&x, *cy = (void *)&y;
+	__asm__ ("" :: "X"(cx), "X"(cy));
+	long double t = x;
+	unsigned short fpsr;
+	do __asm__ ("fprem1; fnstsw %0" : "=a"(fpsr), "+t"(t) : "u"(y));
+	while (fpsr & 0x400);
+	unsigned qbits = (fpsr & 0x4300) * 0x910000u >> 29;
+	/* [sizeof(long double) - 1] not usable here due to
+	   GCC's braindead handling of long double alias tbyte */
+	*quo = (cx[9]^cy[9]) < 0 ? -qbits : qbits;
+	return t;
+}

--- -/src/math/i386/remquol.s
+++ /dev/null
@@ -1,1 +0,0 @@
-# see remquo.s

--- /dev/null
+++ +/src/math/i386/remquo.c
@@ -0,0 +1,14 @@
+#include <math.h>
+
+double remquo(double x, double y, int *quo)
+{
+	/* see ../x86_64/remquol.c */
+	signed char *cx = (void *)&x, *cy = (void *)&y;
+	__asm__ ("" :: "X"(cx), "X"(cy));
+	double t = x;
+	unsigned short fpsr;
+	do __asm__ ("fprem1; fnstsw %0" : "=a"(fpsr), "+t"(t) : "u"(y));
+	while (fpsr & 0x400);
+	unsigned qbits = (fpsr & 0x4300) * 0x910000u >> 29;
+	*quo = (cx[sizeof(x) - 1]^cy[sizeof(y) - 1]) < 0 ? -qbits : qbits;
+	return t;
+}

--- -/src/math/i386/remquo.s
+++ /dev/null
@@ -1,50 +0,0 @@
-.global remquof
-.type remquof,@function
-remquof:
-	mov 12(%esp),%ecx
-	flds 8(%esp)
-	flds 4(%esp)
-	mov 11(%esp),%dh
-	xor 7(%esp),%dh
-	jmp 1f
-
-.global remquol
-.type remquol,@function
-remquol:
-	mov 28(%esp),%ecx
-	fldt 16(%esp)
-	fldt 4(%esp)
-	mov 25(%esp),%dh
-	xor 13(%esp),%dh
-	jmp 1f
-
-.global remquo
-.type remquo,@function
-remquo:
-	mov 20(%esp),%ecx
-	fldl 12(%esp)
-	fldl 4(%esp)
-	mov 19(%esp),%dh
-	xor 11(%esp),%dh
-1:	fprem1
-	fnstsw %ax
-	sahf
-	jp 1b
-	fstp %st(1)
-	mov %ah,%dl
-	shr %dl
-	and $1,%dl
-	mov %ah,%al
-	shr $5,%al
-	and $2,%al
-	or %al,%dl
-	mov %ah,%al
-	shl $2,%al
-	and $4,%al
-	or %al,%dl
-	test %dh,%dh
-	jns 1f
-	neg %dl
-1:	movsbl %dl,%edx
-	mov %edx,(%ecx)
-	ret