|
Message-ID: <2C3325A208DA4260A1A0F7B4517D6DFA@H270> Date: Tue, 10 Dec 2019 17:57:55 +0100 From: "Stefan Kanthak" <stefan.kanthak@...go.de> To: <musl@...ts.openwall.com> Subject: More patches for math subtree Some more optimisations: the current implementations of ceil(), floor() and trunc() for i386 change the rounding control using fldcw instructions, which are SLOW; these patches provide faster and smaller branch-free (!) implementations. JFTR: I'm NOT subscribed to your mailing list, so CC: me in replies! --- -/src/math/i386/floor.s +++ +/src/math/i386/floor.s @@ -1,67 +1,26 @@ .global floorf .type floorf,@function floorf: flds 4(%esp) jmp 1f .global floorl .type floorl,@function floorl: fldt 4(%esp) jmp 1f .global floor .type floor,@function floor: fldl 4(%esp) +1: fld %st(0) + frndint + fxch %st(1) + fucomip %st(1),%st(0) + fld1 + fldz + fcmovb %st(1),%st(0) + fsubp %st(0),%st(2) + fstp %st(0) + ret -1: mov $0x7,%al -1: fstcw 4(%esp) - mov 5(%esp),%ah - mov %al,5(%esp) - fldcw 4(%esp) - frndint - mov %ah,5(%esp) - fldcw 4(%esp) - ret - -.global ceil -.type ceil,@function -ceil: - fldl 4(%esp) - mov $0xb,%al - jmp 1b - -.global ceilf -.type ceilf,@function -ceilf: - flds 4(%esp) - mov $0xb,%al - jmp 1b - -.global ceill -.type ceill,@function -ceill: - fldt 4(%esp) - mov $0xb,%al - jmp 1b - -.global trunc -.type trunc,@function -trunc: - fldl 4(%esp) - mov $0xf,%al - jmp 1b - -.global truncf -.type truncf,@function -truncf: - flds 4(%esp) - mov $0xf,%al - jmp 1b - -.global truncl -.type truncl,@function -truncl: - fldt 4(%esp) - mov $0xf,%al - jmp 1b --- -/src/math/i386/ceilf.s +++ +/src/math/i386/ceilf.s @@ -1,1 +1,1 @@ -# see floor.s +# see ceil.s --- -/src/math/i386/ceill.s +++ +/src/math/i386/ceill.s @@ -1,1 +1,1 @@ -# see floor.s +# see ceil.s --- -/src/math/i386/ceil.s +++ +/src/math/i386/ceil.s @@ -1,1 +1,26 @@ -# see floor.s +.global ceilf +.type ceilf,@function +ceilf: + flds 4(%esp) + jmp 1f + +.global ceill +.type ceill,@function +ceill: + fldt 4(%esp) + jmp 1f + +.global ceil +.type ceil,@function +ceil: + fldl 4(%esp) +1: fld %st(0) + frndint + fxch %st(1) + fucomip %st(1),%st(0) + fld1 + fldz + fcmovnbe %st(1),%st(0) + faddp %st(0),%st(1) + fstp %st(0) + ret --- -/src/math/i386/truncf.s +++ +/src/math/i386/truncf.s @@ -1,1 +1,1 @@ -# see floor.s +# see trunc.s --- -/src/math/i386/truncl.s +++ +/src/math/i386/truncl.s @@ -1,1 +1,1 @@ -# see floor.s +# see trunc.s --- -/src/math/i386/trunc.s +++ +/src/math/i386/trunc.s @@ -1,1 +1,32 @@ -# see floor.s +.global truncf +.type truncf,@function +truncf: + flds 4(%esp) + jmp 1f + +.global truncl +.type truncl,@function +truncl: + fldt 4(%esp) + jmp 1f + +.global trunc +.type trunc,@function +trunc: + fldl 4(%esp) +1: fld %st(0) + fabs + fld %st(0) + frndint + fxch %st(1) + fucomip %st(1),%st(0) + fldz + fld1 + fcmovnb %st(1),%st(0) + fsubp %st(0),%st(2) + fucomip %st(2),%st(0) + fst %st(1) + fchs + fcmovbe %st(1),%st(0) + fstp %st(1) + ret
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.