|
Message-Id: <8fea6ddefeac5257eccd7d9d7c9597f95ff587a0.1685536608.git.Jens.Gustedt@inria.fr> Date: Wed, 31 May 2023 16:15:48 +0200 From: Jens Gustedt <Jens.Gustedt@...ia.fr> To: musl@...ts.openwall.com Subject: [C23 128 bit 2/4] C23: implement w128 and wf128 support for printf C23 now allows extended integer types wider than `intmax_t` for the case that they are used to implement some fixed-width integer type. The length specifer wN can then be used for `printf` and friends to print the type. Gcc and clang provide `__int128` types for many architectures (in particular x86_64) since a long time, and adding these types "officially" is a recurrent user request. They are particularly nice to have for bitsets. Implementing w128 (and wf128) is a first step to provide `int128_t` and `uint128_t` natively, independently of the fact if there already is a compiler that implements these types. This implementation hopefully only adds a very mild overhead in size and processing time for those architectures where this type is present. The impact is - one extra state array for the 128 bit type (some 60 static byte or so) - a widening of the local buffer needed to collect digits (512 bytes on the stack) - widening of the static format functions to accept the 128 type (probably some byte for some extra load instructions) These functions then are possibly a bit slower, since they use wider instructions and/or combine several instructions. There was already an optimization in place for the 'u' format, because here division and modulo base 10 is needed, which could be a bit costly. We now apply similar tricks for the other integer formats to avoid passing into the 128 bit emulation if that is possible. --- src/stdio/vfprintf.c | 113 ++++++++++++++++++++++++++---------------- src/stdio/vfwprintf.c | 45 ++++++++++------- 2 files changed, 97 insertions(+), 61 deletions(-) diff --git a/src/stdio/vfprintf.c b/src/stdio/vfprintf.c index a531a513..aac065fc 100644 --- a/src/stdio/vfprintf.c +++ b/src/stdio/vfprintf.c @@ -1,4 +1,5 @@ #include "stdio_impl.h" +#include "uwide128.h" #include <errno.h> #include <ctype.h> #include <limits.h> @@ -35,10 +36,12 @@ enum { BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE, ZTPRE, JPRE, WPRE, STOP, + WWPRE, PTR, INT, UINT, ULLONG, LONG, ULONG, SHORT, USHORT, CHAR, UCHAR, LLONG, SIZET, IMAX, UMAX, PDIFF, UIPTR, + INT128, UINT128, DBL, LDBL, NOARG, MAXSTATE @@ -114,7 +117,7 @@ static const unsigned char states[]['z'-'A'+1] = { union arg { - uintmax_t i; + uwide128 i; long double f; void *p; }; @@ -122,22 +125,24 @@ union arg static void pop_arg(union arg *arg, int type, va_list *ap) { switch (type) { - case PTR: arg->p = va_arg(*ap, void *); - break; case INT: arg->i = va_arg(*ap, int); - break; case UINT: arg->i = va_arg(*ap, unsigned int); - break; case LONG: arg->i = va_arg(*ap, long); - break; case ULONG: arg->i = va_arg(*ap, unsigned long); - break; case ULLONG: arg->i = va_arg(*ap, unsigned long long); - break; case SHORT: arg->i = (short)va_arg(*ap, int); - break; case USHORT: arg->i = (unsigned short)va_arg(*ap, int); - break; case CHAR: arg->i = (signed char)va_arg(*ap, int); - break; case UCHAR: arg->i = (unsigned char)va_arg(*ap, int); - break; case LLONG: arg->i = va_arg(*ap, long long); - break; case SIZET: arg->i = va_arg(*ap, size_t); - break; case IMAX: arg->i = va_arg(*ap, intmax_t); - break; case UMAX: arg->i = va_arg(*ap, uintmax_t); - break; case PDIFF: arg->i = va_arg(*ap, ptrdiff_t); - break; case UIPTR: arg->i = (uintptr_t)va_arg(*ap, void *); + case PTR: arg->p = va_arg(*ap, void *); + break; case INT: arg->i = __uwide128_i64(va_arg(*ap, int)); + break; case UINT: arg->i = __uwide128_u64(va_arg(*ap, unsigned int)); + break; case LONG: arg->i = __uwide128_i64(va_arg(*ap, long)); + break; case ULONG: arg->i = __uwide128_u64(va_arg(*ap, unsigned long)); + break; case ULLONG: arg->i = __uwide128_u64(va_arg(*ap, unsigned long long)); + break; case SHORT: arg->i = __uwide128_i64((short)va_arg(*ap, int)); + break; case USHORT: arg->i = __uwide128_u64((unsigned short)va_arg(*ap, int)); + break; case CHAR: arg->i = __uwide128_i64((signed char)va_arg(*ap, int)); + break; case UCHAR: arg->i = __uwide128_u64((unsigned char)va_arg(*ap, int)); + break; case LLONG: arg->i = __uwide128_i64(va_arg(*ap, long long)); + break; case SIZET: arg->i = __uwide128_u64(va_arg(*ap, size_t)); + break; case IMAX: arg->i = __uwide128_i64(va_arg(*ap, intmax_t)); + break; case UMAX: arg->i = __uwide128_u64(va_arg(*ap, uintmax_t)); + break; case PDIFF: arg->i = __uwide128_i64(va_arg(*ap, ptrdiff_t)); + break; case UIPTR: arg->i = __uwide128_u64((uintptr_t)va_arg(*ap, void *)); + break; case INT128: arg->i = __uwide128_pop(ap); + break; case UINT128: arg->i = __uwide128_pop(ap); break; case DBL: arg->f = va_arg(*ap, double); break; case LDBL: arg->f = va_arg(*ap, long double); } @@ -163,29 +168,51 @@ static const char xdigits[16] = { "0123456789ABCDEF" }; -static char *fmt_x(uintmax_t x, char *s, int lower) +static char *fmt_u(unsigned long long x, char *s) { - for (; x; x>>=4) *--s = xdigits[(x&15)]|lower; + unsigned long y; + for ( ; x>ULONG_MAX; x/=10) *--s = '0' + x%10; + for (y=x; y; y/=10) *--s = '0' + y%10; return s; } -static char *fmt_b(uintmax_t x, char *s) +static char *fmt128_x(uwide128 x, char *s, int lower) { - for (; x; x>>=1) *--s = '0' + (x&1); + uint32_t y; + uint64_t z; + while (x.v64[hi64]) *--s = xdigits[__uwide128_div16(&x)]|lower; + for (z = x.v64[lo64]; z>UINT32_MAX; z>>=4) *--s = xdigits[(z&15)]|lower; + for (y=z; y; y>>=4) *--s = xdigits[(y&15)]|lower; return s; } -static char *fmt_o(uintmax_t x, char *s) +static char *fmt128_b(uwide128 x, char *s) { - for (; x; x>>=3) *--s = '0' + (x&7); + uint64_t z; + uint32_t y; + while (x.v64[hi64]) *--s = '0' + __uwide128_div2(&x); + for (z = x.v64[lo64]; z>UINT32_MAX; z>>=1) *--s = '0' + z%2; + for (y=z; y; y>>=1) *--s = '0' + y%2; return s; } -static char *fmt_u(uintmax_t x, char *s) +static char *fmt128_o(uwide128 x, char *s) { - unsigned long y; - for ( ; x>ULONG_MAX; x/=10) *--s = '0' + x%10; - for (y=x; y; y/=10) *--s = '0' + y%10; + uint64_t z; + uint32_t y; + while (x.v64[hi64]) *--s = '0' + __uwide128_div8(&x); + for (z = x.v64[lo64]; z>UINT32_MAX; z>>=3) *--s = '0' + z%8; + for (y=z; y; y>>=3) *--s = '0' + y%8; + return s; +} + +static char *fmt128_u(uwide128 x, char *s) +{ + uint64_t z; + uint32_t y; + while (x.v64[hi64]) *--s = '0' + __uwide128_div10(&x); + for (z = x.v64[lo64]; z>UINT32_MAX; z/=10) *--s = '0' + z%10; + for (y=z; y; y/=10) *--s = '0' + y%10; return s; } @@ -456,7 +483,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, unsigned st, ps, width=0; int cnt=0, l=0; size_t i; - char buf[sizeof(uintmax_t)*CHAR_BIT+3+LDBL_MANT_DIG/4]; + char buf[sizeof(uwide128)*CHAR_BIT+3+LDBL_MANT_DIG/4]; const char *prefix; int t, pl; wchar_t wc[2], *ws; @@ -498,7 +525,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, if (isdigit(s[1]) && s[2]=='$') { l10n=1; if (!f) nl_type[s[1]-'0'] = INT, w = 0; - else w = nl_arg[s[1]-'0'].i; + else w = nl_arg[s[1]-'0'].i.v64[lo64]; s+=3; } else if (!l10n) { w = f ? va_arg(*ap, int) : 0; @@ -511,7 +538,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, if (*s=='.' && s[1]=='*') { if (isdigit(s[2]) && s[3]=='$') { if (!f) nl_type[s[2]-'0'] = INT, p = 0; - else p = nl_arg[s[2]-'0'].i; + else p = nl_arg[s[2]-'0'].i.v64[lo64]; s+=4; } else if (!l10n) { p = f ? va_arg(*ap, int) : 0; @@ -552,6 +579,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, #else case 64: ps = LLPRE; st = (st == UINT) ? ULLONG : ((st == INT) ? LLONG : PTR); break; #endif + case 128: ps = WWPRE; st = (st == UINT) ? UINT128 : ((st == INT) ? INT128 : PTR); break; default: goto inval; } @@ -592,6 +620,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, case HHPRE: *(unsigned char *)arg.p = cnt; break; case ZTPRE: *(size_t *)arg.p = cnt; break; case JPRE: *(uintmax_t *)arg.p = cnt; break; + case WWPRE: *(uwide128 *)arg.p = __uwide128_i64(cnt); break; } continue; case 'p': @@ -599,39 +628,39 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, t = 'x'; fl |= ALT_FORM; case 'x': case 'X': - a = fmt_x(arg.i, z, t&32); - if (arg.i && (fl & ALT_FORM)) prefix+=(t>>4), pl=2; + a = fmt128_x(arg.i, z, t&32); + if (!__uwide128_iszero(arg.i) && (fl & ALT_FORM)) prefix+=(t>>4), pl=2; if (0) { case 'b': case 'B': - a = fmt_b(arg.i, z); - if (arg.i && (fl & ALT_FORM)) prefix = (t == 'b' ? "0b" : "0B"), pl=2; + a = fmt128_b(arg.i, z); + if (!__uwide128_iszero(arg.i) && (fl & ALT_FORM)) prefix = (t == 'b' ? "0b" : "0B"), pl=2; } if (0) { case 'o': - a = fmt_o(arg.i, z); + a = fmt128_o(arg.i, z); if ((fl&ALT_FORM) && p<z-a+1) p=z-a+1; } if (0) { case 'd': case 'i': pl=1; - if (arg.i>INTMAX_MAX) { - arg.i=-arg.i; + if (arg.i.v64[hi64]>INT64_MAX) { + arg.i=__uwide128_neg(arg.i); } else if (fl & MARK_POS) { prefix++; } else if (fl & PAD_POS) { prefix+=2; } else pl=0; case 'u': - a = fmt_u(arg.i, z); + a = fmt128_u(arg.i, z); } if (xp && p<0) goto overflow; if (xp) fl &= ~ZERO_PAD; - if (!arg.i && !p) { + if (__uwide128_iszero(arg.i) && !p) { a=z; break; } - p = MAX(p, z-a + !arg.i); + p = MAX(p, z-a + __uwide128_iszero(arg.i)); break; case 'c': - *(a=z-(p=1))=arg.i; + *(a=z-(p=1))=arg.i.v64[lo64]; fl &= ~ZERO_PAD; break; case 'm': @@ -644,7 +673,7 @@ static int printf_core(FILE *f, const char *fmt, va_list *ap, union arg *nl_arg, fl &= ~ZERO_PAD; break; case 'C': - wc[0] = arg.i; + wc[0] = arg.i.v64[lo64]; wc[1] = 0; arg.p = wc; p = -1; diff --git a/src/stdio/vfwprintf.c b/src/stdio/vfwprintf.c index 3689c2d5..d510233f 100644 --- a/src/stdio/vfwprintf.c +++ b/src/stdio/vfwprintf.c @@ -1,4 +1,5 @@ #include "stdio_impl.h" +#include "uwide128.h" #include <errno.h> #include <ctype.h> #include <limits.h> @@ -28,10 +29,12 @@ enum { BARE, LPRE, LLPRE, HPRE, HHPRE, BIGLPRE, ZTPRE, JPRE, WPRE, STOP, + WWPRE, PTR, INT, UINT, ULLONG, LONG, ULONG, SHORT, USHORT, CHAR, UCHAR, LLONG, SIZET, IMAX, UMAX, PDIFF, UIPTR, + INT128, UINT128, DBL, LDBL, NOARG, MAXSTATE @@ -107,7 +110,7 @@ static const unsigned char states[]['z'-'A'+1] = { union arg { - uintmax_t i; + uwide128 i; long double f; void *p; }; @@ -116,21 +119,23 @@ static void pop_arg(union arg *arg, int type, va_list *ap) { switch (type) { case PTR: arg->p = va_arg(*ap, void *); - break; case INT: arg->i = va_arg(*ap, int); - break; case UINT: arg->i = va_arg(*ap, unsigned int); - break; case LONG: arg->i = va_arg(*ap, long); - break; case ULONG: arg->i = va_arg(*ap, unsigned long); - break; case ULLONG: arg->i = va_arg(*ap, unsigned long long); - break; case SHORT: arg->i = (short)va_arg(*ap, int); - break; case USHORT: arg->i = (unsigned short)va_arg(*ap, int); - break; case CHAR: arg->i = (signed char)va_arg(*ap, int); - break; case UCHAR: arg->i = (unsigned char)va_arg(*ap, int); - break; case LLONG: arg->i = va_arg(*ap, long long); - break; case SIZET: arg->i = va_arg(*ap, size_t); - break; case IMAX: arg->i = va_arg(*ap, intmax_t); - break; case UMAX: arg->i = va_arg(*ap, uintmax_t); - break; case PDIFF: arg->i = va_arg(*ap, ptrdiff_t); - break; case UIPTR: arg->i = (uintptr_t)va_arg(*ap, void *); + break; case INT: arg->i = __uwide128_i64(va_arg(*ap, int)); + break; case UINT: arg->i = __uwide128_u64(va_arg(*ap, unsigned int)); + break; case LONG: arg->i = __uwide128_i64(va_arg(*ap, long)); + break; case ULONG: arg->i = __uwide128_u64(va_arg(*ap, unsigned long)); + break; case ULLONG: arg->i = __uwide128_u64(va_arg(*ap, unsigned long long)); + break; case SHORT: arg->i = __uwide128_i64((short)va_arg(*ap, int)); + break; case USHORT: arg->i = __uwide128_u64((unsigned short)va_arg(*ap, int)); + break; case CHAR: arg->i = __uwide128_i64((signed char)va_arg(*ap, int)); + break; case UCHAR: arg->i = __uwide128_u64((unsigned char)va_arg(*ap, int)); + break; case LLONG: arg->i = __uwide128_i64(va_arg(*ap, long long)); + break; case SIZET: arg->i = __uwide128_u64(va_arg(*ap, size_t)); + break; case IMAX: arg->i = __uwide128_i64(va_arg(*ap, intmax_t)); + break; case UMAX: arg->i = __uwide128_u64(va_arg(*ap, uintmax_t)); + break; case PDIFF: arg->i = __uwide128_i64(va_arg(*ap, ptrdiff_t)); + break; case UIPTR: arg->i = __uwide128_u64((uintptr_t)va_arg(*ap, void *)); + break; case INT128: arg->i = __uwide128_pop(ap); + break; case UINT128: arg->i = __uwide128_pop(ap); break; case DBL: arg->f = va_arg(*ap, double); break; case LDBL: arg->f = va_arg(*ap, long double); } @@ -213,7 +218,7 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_ if (iswdigit(s[1]) && s[2]=='$') { l10n=1; nl_type[s[1]-'0'] = INT; - w = nl_arg[s[1]-'0'].i; + w = nl_arg[s[1]-'0'].i.v64[lo64]; s+=3; } else if (!l10n) { w = f ? va_arg(*ap, int) : 0; @@ -226,7 +231,7 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_ if (*s=='.' && s[1]=='*') { if (isdigit(s[2]) && s[3]=='$') { nl_type[s[2]-'0'] = INT; - p = nl_arg[s[2]-'0'].i; + p = nl_arg[s[2]-'0'].i.v64[lo64]; s+=4; } else if (!l10n) { p = f ? va_arg(*ap, int) : 0; @@ -267,6 +272,7 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_ #else case 64: ps = LLPRE; st = (st == UINT) ? ULLONG : ((st == INT) ? LLONG : PTR); break; #endif + case 128: ps = WWPRE; st = (st == UINT) ? UINT128 : ((st == INT) ? INT128 : PTR); break; default: goto inval; } @@ -297,13 +303,14 @@ static int wprintf_core(FILE *f, const wchar_t *fmt, va_list *ap, union arg *nl_ case HHPRE: *(unsigned char *)arg.p = cnt; break; case ZTPRE: *(size_t *)arg.p = cnt; break; case JPRE: *(uintmax_t *)arg.p = cnt; break; + case WWPRE: *(uwide128 *)arg.p = __uwide128_i64(cnt); break; } continue; case 'c': case 'C': if (w<1) w=1; pad(f, w-1, fl); - out(f, &(wchar_t){t=='C' ? arg.i : btowc(arg.i)}, 1); + out(f, &(wchar_t){t=='C' ? arg.i.v64[lo64] : btowc(arg.i.v64[lo64])}, 1); pad(f, w-1, fl^LEFT_ADJ); l = w; continue; -- 2.34.1
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.