|
Message-ID: <20190923174029.GN9017@brightrain.aerifal.cx> Date: Mon, 23 Sep 2019 13:40:29 -0400 From: Rich Felker <dalias@...c.org> To: musl@...ts.openwall.com Subject: Re: [PATCH] math: optimize lrint on 32bit targets On Sun, Sep 22, 2019 at 10:43:35PM +0200, Szabolcs Nagy wrote: > * Szabolcs Nagy <nsz@...t70.net> [2019-09-21 17:52:35 +0200]: > > this was discussed on irc. > > did more benchmarks, on i486 branches seem better > than setting the sign bit but on arm branch is > worse so i keep the original code, just changed > the code style (asuint macro instead of union). > > >From 67990a5c85fc5db55831f9ddddc58317e5b344b6 Mon Sep 17 00:00:00 2001 > From: Szabolcs Nagy <nsz@...t70.net> > Date: Mon, 16 Sep 2019 20:33:11 +0000 > Subject: [PATCH] math: optimize lrint on 32bit targets > > lrint in (LONG_MAX, 1/DBL_EPSILON) and in (-1/DBL_EPSILON, LONG_MIN) > is not trivial: rounding to int may be inexact, but the conversion to > int may overflow and then the inexact flag must not be raised. (the > overflow threshold is rounding mode dependent). > > this matters on 32bit targets (without single instruction lrint or > rint), so the common case (when there is no overflow) is optimized by > inlining the lrint logic, otherwise the old code is kept as a fallback. > > on my laptop an i486 lrint call is asm:10ns, old c:30ns, new c:21ns > on a smaller arm core: old c:71ns, new c:34ns > on a bigger arm core: old c:27ns, new c:19ns > --- > src/math/lrint.c | 28 +++++++++++++++++++++++++++- > 1 file changed, 27 insertions(+), 1 deletion(-) > > diff --git a/src/math/lrint.c b/src/math/lrint.c > index bdca8b7c..ddee7a0d 100644 > --- a/src/math/lrint.c > +++ b/src/math/lrint.c > @@ -1,5 +1,6 @@ > #include <limits.h> > #include <fenv.h> > +#include <math.h> > #include "libm.h" > > /* > @@ -26,7 +27,18 @@ as a double. > */ > > #if LONG_MAX < 1U<<53 && defined(FE_INEXACT) > -long lrint(double x) > +#include <float.h> > +#include <stdint.h> > +#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 > +#define EPS DBL_EPSILON > +#elif FLT_EVAL_METHOD==2 > +#define EPS LDBL_EPSILON > +#endif > +#ifdef __GNUC__ > +/* avoid stack frame in lrint */ > +__attribute__((noinline)) > +#endif > +static long lrint_slow(double x) > { > #pragma STDC FENV_ACCESS ON > int e; > @@ -38,6 +50,20 @@ long lrint(double x) > /* conversion */ > return x; > } > + > +long lrint(double x) > +{ > + uint32_t abstop = asuint64(x)>>32 & 0x7fffffff; > + uint64_t sign = asuint64(x) & (1ULL << 63); > + > + if (abstop < 0x41dfffff) { > + /* |x| < 0x7ffffc00, no overflow */ > + double_t toint = asdouble(asuint64(1/EPS) | sign); > + double_t y = x + toint - toint; > + return (long)y; > + } > + return lrint_slow(x); > +} > #else > long lrint(double x) > { This code should be considerably faster than calling rint on 64-bit archs too, no? I wonder if it should be something like (untested, written inline here): long lrint(double x) { uint32_t abstop = asuint64(x)>>32 & 0x7fffffff; uint64_t sign = asuint64(x) & (1ULL << 63); #if LONG_MAX < 1U<<53 && defined(FE_INEXACT) if (abstop >= 0x41dfffff) return lrint_slow(x); #endif /* |x| < 0x7ffffc00, no overflow */ double_t toint = asdouble(asuint64(1/EPS) | sign); double_t y = x + toint - toint; return (long)y; } Rich
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.