|
Message-ID: <9216362a-951e-4c56-b7a3-b865fde49aed@loongson.cn> Date: Sat, 11 May 2024 09:06:34 +0800 From: lixing <lixing@...ngson.cn> To: ticat_fp <fanpeng@...ngson.cn>, musl@...ts.openwall.com Cc: huajingyun@...ngson.cn, wanghongliang@...ngson.cn Subject: Re: [PATCH] math: add LoongArch support for common APIs with inline assembly. Hi, Rich Can you take some time to check these loongarch64 optimization of math functions ? We've verified this patch in alpine system. Thanks. 在 2024/4/23 上午10:26, ticat_fp 写道: > Including: ceil, copysign, fabs, floor, fma, fmax, fmin, llrint, > lrint, rint, sqrt and their f versions. > > --- > src/math/loongarch64/ceil.c | 25 +++++++++++++++++++++++++ > src/math/loongarch64/ceilf.c | 25 +++++++++++++++++++++++++ > src/math/loongarch64/copysign.c | 7 +++++++ > src/math/loongarch64/copysignf.c | 7 +++++++ > src/math/loongarch64/fabs.c | 7 +++++++ > src/math/loongarch64/fabsf.c | 7 +++++++ > src/math/loongarch64/floor.c | 22 ++++++++++++++++++++++ > src/math/loongarch64/floorf.c | 22 ++++++++++++++++++++++ > src/math/loongarch64/fma.c | 7 +++++++ > src/math/loongarch64/fmaf.c | 7 +++++++ > src/math/loongarch64/fmax.c | 7 +++++++ > src/math/loongarch64/fmaxf.c | 7 +++++++ > src/math/loongarch64/fmin.c | 7 +++++++ > src/math/loongarch64/fminf.c | 7 +++++++ > src/math/loongarch64/llrint.c | 17 +++++++++++++++++ > src/math/loongarch64/llrintf.c | 17 +++++++++++++++++ > src/math/loongarch64/lrint.c | 17 +++++++++++++++++ > src/math/loongarch64/lrintf.c | 17 +++++++++++++++++ > src/math/loongarch64/rint.c | 7 +++++++ > src/math/loongarch64/rintf.c | 7 +++++++ > src/math/loongarch64/sqrt.c | 7 +++++++ > src/math/loongarch64/sqrtf.c | 7 +++++++ > 22 files changed, 260 insertions(+) > create mode 100644 src/math/loongarch64/ceil.c > create mode 100644 src/math/loongarch64/ceilf.c > create mode 100644 src/math/loongarch64/copysign.c > create mode 100644 src/math/loongarch64/copysignf.c > create mode 100644 src/math/loongarch64/fabs.c > create mode 100644 src/math/loongarch64/fabsf.c > create mode 100644 src/math/loongarch64/floor.c > create mode 100644 src/math/loongarch64/floorf.c > create mode 100644 src/math/loongarch64/fma.c > create mode 100644 src/math/loongarch64/fmaf.c > create mode 100644 src/math/loongarch64/fmax.c > create mode 100644 src/math/loongarch64/fmaxf.c > create mode 100644 src/math/loongarch64/fmin.c > create mode 100644 src/math/loongarch64/fminf.c > create mode 100644 src/math/loongarch64/llrint.c > create mode 100644 src/math/loongarch64/llrintf.c > create mode 100644 src/math/loongarch64/lrint.c > create mode 100644 src/math/loongarch64/lrintf.c > create mode 100644 src/math/loongarch64/rint.c > create mode 100644 src/math/loongarch64/rintf.c > create mode 100644 src/math/loongarch64/sqrt.c > create mode 100644 src/math/loongarch64/sqrtf.c > > diff --git a/src/math/loongarch64/ceil.c b/src/math/loongarch64/ceil.c > new file mode 100644 > index 00000000..95781f4b > --- /dev/null > +++ b/src/math/loongarch64/ceil.c > @@ -0,0 +1,25 @@ > +#include <math.h> > +#include <stdint.h> > + > +double ceil(double x) > +{ > + int32_t old; > + int32_t new; > + int32_t tmp1; > + int32_t tmp2; > + > + __asm__ __volatile__( > + "movfcsr2gr %[orig_old], $r0 \n\t" > + "li.d %[tmp1], 0x200 \n\t" > + "or %[new], %[orig_old], %[tmp1] \n\t" > + "li.d %[tmp2], 0xfffffeff \n\t" > + "and %[new], %[new], %[tmp2] \n\t" > + "movgr2fcsr $r0, %[new] \n\t" > + "frint.d %[result], %[orig_x] \n\t" > + "movgr2fcsr $r0, %[orig_old] \n\t" > + : [result] "+f"(x), [old]"+r"(old), [new]"+r"(new), [tmp1] "+r"(tmp1), [tmp2] "+r"(tmp2) > + : [orig_x] "f"(x), [orig_old]"r"(old), [orig_new]"r"(new), [orig_tmp1] "r"(tmp1), [orig_tmp2] "r"(tmp2) > + :); > + > + return x; > +} > diff --git a/src/math/loongarch64/ceilf.c b/src/math/loongarch64/ceilf.c > new file mode 100644 > index 00000000..03a2d933 > --- /dev/null > +++ b/src/math/loongarch64/ceilf.c > @@ -0,0 +1,25 @@ > +#include <math.h> > +#include <stdint.h> > + > +float ceilf(float x) > +{ > + int32_t old; > + int32_t new; > + int32_t tmp1; > + int32_t tmp2; > + > + __asm__ __volatile__( > + "movfcsr2gr %[orig_old], $r0 \n\t" > + "li.d %[tmp1], 0x200 \n\t" > + "or %[new], %[orig_old], %[tmp1] \n\t" > + "li.d %[tmp2], 0xfffffeff \n\t" > + "and %[new], %[new], %[tmp2] \n\t" > + "movgr2fcsr $r0, %[new] \n\t" > + "frint.s %[result], %[orig_x] \n\t" > + "movgr2fcsr $r0, %[orig_old] \n\t" > + : [result] "+f"(x), [old]"+r"(old), [new]"+r"(new), [tmp1] "+r"(tmp1), [tmp2] "+r"(tmp2) > + : [orig_x] "f"(x), [orig_old]"r"(old), [orig_new]"r"(new), [orig_tmp1] "r"(tmp1), [orig_tmp2] "r"(tmp2) > + :); > + > + return x; > +} > diff --git a/src/math/loongarch64/copysign.c b/src/math/loongarch64/copysign.c > new file mode 100644 > index 00000000..9e3b8de3 > --- /dev/null > +++ b/src/math/loongarch64/copysign.c > @@ -0,0 +1,7 @@ > +#include <math.h> > + > +double copysign(double x, double y) > +{ > + __asm__ __volatile__("fcopysign.d %0, %1, %2" : "=f"(x) : "f"(x), "f"(y)); > + return x; > +} > diff --git a/src/math/loongarch64/copysignf.c b/src/math/loongarch64/copysignf.c > new file mode 100644 > index 00000000..98df4254 > --- /dev/null > +++ b/src/math/loongarch64/copysignf.c > @@ -0,0 +1,7 @@ > +#include <math.h> > + > +float copysignf(float x, float y) > +{ > + __asm__ __volatile__("fcopysign.s %0, %1, %2" : "=f"(x) : "f"(x), "f"(y)); > + return x; > +} > diff --git a/src/math/loongarch64/fabs.c b/src/math/loongarch64/fabs.c > new file mode 100644 > index 00000000..3db57fb5 > --- /dev/null > +++ b/src/math/loongarch64/fabs.c > @@ -0,0 +1,7 @@ > +#include <math.h> > + > +double fabs(double x) > +{ > + __asm__ __volatile__("fabs.d %0, %1" : "=f"(x) : "f"(x)); > + return x; > +} > diff --git a/src/math/loongarch64/fabsf.c b/src/math/loongarch64/fabsf.c > new file mode 100644 > index 00000000..e24201c5 > --- /dev/null > +++ b/src/math/loongarch64/fabsf.c > @@ -0,0 +1,7 @@ > +#include <math.h> > + > +float fabsf(float x) > +{ > + __asm__ __volatile__("fabs.s %0, %1" : "=f"(x) : "f"(x)); > + return x; > +} > diff --git a/src/math/loongarch64/floor.c b/src/math/loongarch64/floor.c > new file mode 100644 > index 00000000..7aead2a3 > --- /dev/null > +++ b/src/math/loongarch64/floor.c > @@ -0,0 +1,22 @@ > +#include <math.h> > +#include <stdint.h> > + > +double floor(double x) > +{ > + int32_t old; > + int32_t new; > + int32_t tmp1; > + > + __asm__ __volatile__( > + "movfcsr2gr %[old], $r0 \n\t" > + "li.d %[tmp1], 0x300 \n\t" > + "or %[new], %[old], %[tmp1] \n\t" > + "movgr2fcsr $r0, %[new] \n\t" > + "frint.d %[result], %[orig_x] \n\t" > + "movgr2fcsr $r0, %[old] \n\t" > + : [result] "+f"(x), [old]"+r"(old), [tmp1] "+r"(tmp1), [new]"+r"(new) > + : [orig_x] "f"(x), [origin_old] "r"(old), [orig_new] "r"(new), [orig_tmp1] "r"(tmp1) > + :); > + > + return x; > +} > diff --git a/src/math/loongarch64/floorf.c b/src/math/loongarch64/floorf.c > new file mode 100644 > index 00000000..772d15eb > --- /dev/null > +++ b/src/math/loongarch64/floorf.c > @@ -0,0 +1,22 @@ > +#include <math.h> > +#include <stdint.h> > + > +float floorf(float x) > +{ > + int32_t old; > + int32_t new; > + int32_t tmp1; > + > + __asm__ __volatile__( > + "movfcsr2gr %[old], $r0 \n\t" > + "li.d %[tmp1], 0x300 \n\t" > + "or %[new], %[old], %[tmp1] \n\t" > + "movgr2fcsr $r0, %[new] \n\t" > + "frint.s %[result], %[orig_x] \n\t" > + "movgr2fcsr $r0, %[old] \n\t" > + : [result] "+f"(x), [old]"+r"(old), [tmp1] "+r"(tmp1), [new]"+r"(new) > + : [orig_x] "f"(x), [origin_old] "r"(old), [orig_new] "r"(new), [orig_tmp1] "r"(tmp1) > + :); > + > + return x; > +} > diff --git a/src/math/loongarch64/fma.c b/src/math/loongarch64/fma.c > new file mode 100644 > index 00000000..0b6a3f23 > --- /dev/null > +++ b/src/math/loongarch64/fma.c > @@ -0,0 +1,7 @@ > +#include <math.h> > + > +double fma(double x, double y, double z) > +{ > + __asm__ __volatile__("fmadd.d %0, %1, %2, %3" : "=f" (x) : "f"(x) , "f" (y), "f" (z)); > + return x; > +} > diff --git a/src/math/loongarch64/fmaf.c b/src/math/loongarch64/fmaf.c > new file mode 100644 > index 00000000..77a8363b > --- /dev/null > +++ b/src/math/loongarch64/fmaf.c > @@ -0,0 +1,7 @@ > +#include <math.h> > + > +float fmaf(float x, float y, float z) > +{ > + __asm__ __volatile__("fmadd.s %0, %1, %2, %3" : "=f" (x) : "f"(x) , "f" (y), "f" (z)); > + return x; > +} > diff --git a/src/math/loongarch64/fmax.c b/src/math/loongarch64/fmax.c > new file mode 100644 > index 00000000..2d091877 > --- /dev/null > +++ b/src/math/loongarch64/fmax.c > @@ -0,0 +1,7 @@ > +#include <math.h> > + > +double fmax(double x, double y) > +{ > + __asm__ __volatile__("fmax.d %0, %1, %2" : "=f"(x) : "f"(x), "f"(y)); > + return x; > +} > diff --git a/src/math/loongarch64/fmaxf.c b/src/math/loongarch64/fmaxf.c > new file mode 100644 > index 00000000..1106d47c > --- /dev/null > +++ b/src/math/loongarch64/fmaxf.c > @@ -0,0 +1,7 @@ > +#include <math.h> > + > +float fmaxf(float x, float y) > +{ > + __asm__ __volatile__("fmax.s %0, %1, %2" : "=f"(x) : "f"(x), "f"(y)); > + return x; > +} > diff --git a/src/math/loongarch64/fmin.c b/src/math/loongarch64/fmin.c > new file mode 100644 > index 00000000..9c44ce87 > --- /dev/null > +++ b/src/math/loongarch64/fmin.c > @@ -0,0 +1,7 @@ > +#include <math.h> > + > +double fmin(double x, double y) > +{ > + __asm__ __volatile__("fmin.d %0, %1, %2" : "=f"(x) : "f"(x), "f"(y)); > + return x; > +} > diff --git a/src/math/loongarch64/fminf.c b/src/math/loongarch64/fminf.c > new file mode 100644 > index 00000000..94a0fa45 > --- /dev/null > +++ b/src/math/loongarch64/fminf.c > @@ -0,0 +1,7 @@ > +#include <math.h> > + > +float fminf(float x, float y) > +{ > + __asm__ __volatile__("fmin.s %0, %1, %2" : "=f"(x) : "f"(x), "f"(y)); > + return x; > +} > diff --git a/src/math/loongarch64/llrint.c b/src/math/loongarch64/llrint.c > new file mode 100644 > index 00000000..766222d3 > --- /dev/null > +++ b/src/math/loongarch64/llrint.c > @@ -0,0 +1,17 @@ > +#include <math.h> > +#include <stdint.h> > + > +long long llrint(double x) > +{ > + long long r; > + > + __asm__ __volatile__( > + "frint.d %[x], %[orig_x] \n\t" > + "ftintrz.l.d %[x], %[x] \n\t" > + "movfr2gr.d %[result], %[x] \n\t" > + : [result]"+r"(r), [x]"+f"(x) > + : [orig_x]"f"(x) > + :); > + > + return r; > +} > diff --git a/src/math/loongarch64/llrintf.c b/src/math/loongarch64/llrintf.c > new file mode 100644 > index 00000000..f5b9dd9f > --- /dev/null > +++ b/src/math/loongarch64/llrintf.c > @@ -0,0 +1,17 @@ > +#include <math.h> > +#include <stdint.h> > + > +long long llrintf(float x) > +{ > + long long r; > + > + __asm__ __volatile__( > + "frint.s %[x], %[orig_x] \n\t" > + "ftintrz.w.s %[x], %[x] \n\t" > + "movfr2gr.s %[result], %[x] \n\t" > + : [result]"+r"(r), [x]"+f"(x) > + : [orig_x]"f"(x) > + :); > + > + return r; > +} > diff --git a/src/math/loongarch64/lrint.c b/src/math/loongarch64/lrint.c > new file mode 100644 > index 00000000..d82239d1 > --- /dev/null > +++ b/src/math/loongarch64/lrint.c > @@ -0,0 +1,17 @@ > +#include <math.h> > +#include <stdint.h> > + > +long lrint(double x) > +{ > + long r; > + > + __asm__ __volatile__( > + "frint.d %[x], %[orig_x] \n\t" > + "ftintrz.l.d %[x], %[x] \n\t" > + "movfr2gr.d %[result], %[x] \n\t" > + : [result]"+r"(r), [x]"+f"(x) > + : [orig_x]"f"(x) > + :); > + > + return r; > +} > diff --git a/src/math/loongarch64/lrintf.c b/src/math/loongarch64/lrintf.c > new file mode 100644 > index 00000000..b30872e9 > --- /dev/null > +++ b/src/math/loongarch64/lrintf.c > @@ -0,0 +1,17 @@ > +#include <math.h> > +#include <stdint.h> > + > +long lrintf(float x) > +{ > + long r; > + > + __asm__ __volatile__( > + "frint.s %[x], %[orig_x] \n\t" > + "ftintrz.l.s %[x], %[x] \n\t" > + "movfr2gr.s %[result], %[x] \n\t" > + : [result]"+r"(r), [x]"+f"(x) > + : [orig_x]"f"(x) > + :); > + > + return r; > +} > diff --git a/src/math/loongarch64/rint.c b/src/math/loongarch64/rint.c > new file mode 100644 > index 00000000..862cea8c > --- /dev/null > +++ b/src/math/loongarch64/rint.c > @@ -0,0 +1,7 @@ > +#include <math.h> > + > +double rint(double x) > +{ > + __asm__ __volatile__("frint.d %0, %1" : "=f"(x) : "f"(x)); > + return x; > +} > diff --git a/src/math/loongarch64/rintf.c b/src/math/loongarch64/rintf.c > new file mode 100644 > index 00000000..79ac216b > --- /dev/null > +++ b/src/math/loongarch64/rintf.c > @@ -0,0 +1,7 @@ > +#include <math.h> > + > +float rintf(float x) > +{ > + __asm__ __volatile__("frint.s %0, %1" : "=f"(x) : "f"(x)); > + return x; > +} > diff --git a/src/math/loongarch64/sqrt.c b/src/math/loongarch64/sqrt.c > new file mode 100644 > index 00000000..a70e20e9 > --- /dev/null > +++ b/src/math/loongarch64/sqrt.c > @@ -0,0 +1,7 @@ > +#include <math.h> > + > +double sqrt(double x) > +{ > + __asm__ __volatile__("fsqrt.d %1, %0" : "=f"(x) : "f"(x)); > + return x; > +} > diff --git a/src/math/loongarch64/sqrtf.c b/src/math/loongarch64/sqrtf.c > new file mode 100644 > index 00000000..796609b0 > --- /dev/null > +++ b/src/math/loongarch64/sqrtf.c > @@ -0,0 +1,7 @@ > +#include <math.h> > + > +float sqrtf(float x) > +{ > + __asm__ __volatile__("fsqrt.s %1, %0" : "=f"(x) : "f"(x)); > + return x; > +}
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.