Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <2C3325A208DA4260A1A0F7B4517D6DFA@H270>
Date: Tue, 10 Dec 2019 17:57:55 +0100
From: "Stefan Kanthak" <stefan.kanthak@...go.de>
To: <musl@...ts.openwall.com>
Subject: More patches for math subtree

Some more optimisations: the current implementations of ceil(), floor()
and trunc() for i386 change the rounding control using fldcw instructions,
which are SLOW; these patches provide faster and smaller branch-free (!)
implementations.

JFTR: I'm NOT subscribed to your mailing list, so CC: me in replies!

--- -/src/math/i386/floor.s
+++ +/src/math/i386/floor.s
@@ -1,67 +1,26 @@
 .global floorf
 .type floorf,@function
 floorf:
         flds 4(%esp)
         jmp 1f
 
 .global floorl
 .type floorl,@function
 floorl:
         fldt 4(%esp)
         jmp 1f
 
 .global floor
 .type floor,@function
 floor:
         fldl 4(%esp)
+1:      fld %st(0)
+        frndint
+        fxch %st(1)
+        fucomip %st(1),%st(0)
+        fld1
+        fldz
+        fcmovb %st(1),%st(0)
+        fsubp %st(0),%st(2)
+        fstp %st(0)
+        ret
-1:      mov $0x7,%al
-1:      fstcw 4(%esp)
-        mov 5(%esp),%ah
-        mov %al,5(%esp)
-        fldcw 4(%esp)
-        frndint
-        mov %ah,5(%esp)
-        fldcw 4(%esp)
-        ret
-
-.global ceil
-.type ceil,@function
-ceil:
-        fldl 4(%esp)
-        mov $0xb,%al
-        jmp 1b
-
-.global ceilf
-.type ceilf,@function
-ceilf:
-        flds 4(%esp)
-        mov $0xb,%al
-        jmp 1b
-
-.global ceill
-.type ceill,@function
-ceill:
-        fldt 4(%esp)
-        mov $0xb,%al
-        jmp 1b
-
-.global trunc
-.type trunc,@function
-trunc:
-        fldl 4(%esp)
-        mov $0xf,%al
-        jmp 1b
-
-.global truncf
-.type truncf,@function
-truncf:
-        flds 4(%esp)
-        mov $0xf,%al
-        jmp 1b
-
-.global truncl
-.type truncl,@function
-truncl:
-        fldt 4(%esp)
-        mov $0xf,%al
-        jmp 1b

--- -/src/math/i386/ceilf.s
+++ +/src/math/i386/ceilf.s
@@ -1,1 +1,1 @@
-# see floor.s
+# see ceil.s

--- -/src/math/i386/ceill.s
+++ +/src/math/i386/ceill.s
@@ -1,1 +1,1 @@
-# see floor.s
+# see ceil.s

--- -/src/math/i386/ceil.s
+++ +/src/math/i386/ceil.s
@@ -1,1 +1,26 @@
-# see floor.s
+.global ceilf
+.type ceilf,@function
+ceilf:
+        flds 4(%esp)
+        jmp 1f
+
+.global ceill
+.type ceill,@function
+ceill:
+        fldt 4(%esp)
+        jmp 1f
+
+.global ceil
+.type ceil,@function
+ceil:
+        fldl 4(%esp)
+1:      fld %st(0)
+        frndint
+        fxch %st(1)
+        fucomip %st(1),%st(0)
+        fld1
+        fldz
+        fcmovnbe %st(1),%st(0)
+        faddp %st(0),%st(1)
+        fstp %st(0)
+        ret

--- -/src/math/i386/truncf.s
+++ +/src/math/i386/truncf.s
@@ -1,1 +1,1 @@
-# see floor.s
+# see trunc.s

--- -/src/math/i386/truncl.s
+++ +/src/math/i386/truncl.s
@@ -1,1 +1,1 @@
-# see floor.s
+# see trunc.s

--- -/src/math/i386/trunc.s
+++ +/src/math/i386/trunc.s
@@ -1,1 +1,32 @@
-# see floor.s
+.global truncf
+.type truncf,@function
+truncf:
+        flds 4(%esp)
+        jmp 1f
+
+.global truncl
+.type truncl,@function
+truncl:
+        fldt 4(%esp)
+        jmp 1f
+
+.global trunc
+.type trunc,@function
+trunc:
+        fldl 4(%esp)
+1:      fld %st(0)
+        fabs
+        fld %st(0)
+        frndint
+        fxch %st(1)
+        fucomip %st(1),%st(0)
+        fldz
+        fld1
+        fcmovnb %st(1),%st(0)
+        fsubp %st(0),%st(2)
+        fucomip %st(2),%st(0)
+        fst %st(1)
+        fchs
+        fcmovbe %st(1),%st(0)
+        fstp %st(1)
+        ret

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.