|
Message-Id: <CCS2VQVL0E4A.3M54Y72ERQYI@mussels> Date: Tue, 13 Jul 2021 11:22:36 -0300 From: Érico Nogueira <ericonr@...root.org> To: <musl@...ts.openwall.com> Subject: Re: Changes for strcspn(), strspn(), strtok() and strtok_r() On Tue Jul 13, 2021 at 9:02 AM -03, Stefan Kanthak wrote: > <https://git.musl-libc.org/cgit/musl/plain/src/string/strcspn.c> > > #include <string.h> > > #define BITOP(a,b,op) \ > ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof > *(a)))) > > -size_t strcspn(const char *s, const char *c) > +size_t strcspn(const char *restrict s, const char *c) > { > - const char *a = s; > + const char *a; > - size_t byteset[32/sizeof(size_t)]; > + size_t byteset[32/sizeof(size_t)] = { 0 }; > > - if (!c[0] || !c[1]) return __strchrnul(s, *c)-a; > + if (!c[0] || !c[1]) return __strchrnul(a=s, *c)-a; > > - memset(byteset, 0, sizeof byteset); > for (; *c && BITOP(byteset, *(unsigned char *)c, |=); c++); > - for (; *s && !BITOP(byteset, *(unsigned char *)s, &); s++); > - return s-a; > + for (a=s; *a && !BITOP(byteset, *(unsigned char *)a, &); a++); > + return a-s; > } I don't know if it makes sense to give such a hint to the compiler, but doing something like: [...] - size_t byteset[32/sizeof(size_t)]; - if (!c[0] || !c[1]) return __strchrnul(s, *c)-a; + if (!c[0] || !c[1]) return __strchrnul(a=s, *c)-a; - memset(byteset, 0, sizeof byteset); + size_t byteset[32/sizeof(size_t)] = { 0 }; [...] would possibly leave the idea about initialization order clearer, at the expense of not following the code style. > > After this change, musl's favorite compiler (GCC) will generate code > like the following (here for x86-64), just like the code it already > generates for strspn(), where the initialization of byteset[] is NOT > done via memset(): > > strcspn: > ... > xor %eax, %eax > movq %rax, byteset(%rsp) > movq %rax, byteset+8(%rsp) > movq %rax, byteset+16(%rsp) > movq %rax, byteset+24(%rsp) > ... > > <https://git.musl-libc.org/cgit/musl/plain/src/string/strspn.c> > > #include <string.h> > > #define BITOP(a,b,op) \ > ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof > *(a)))) > > -size_t strspn(const char *s, const char *c) > +size_t strspn(const char *restrict s, const char *c) > { > - const char *a = s; > + const char *a; > size_t byteset[32/sizeof(size_t)] = { 0 }; > > if (!c[0]) return 0; > if (!c[1]) { > - for (; *s == *c; s++); > - return s-a; > + for (a=s; *a == *c; a++); > + return a-s; > } > > for (; *c && BITOP(byteset, *(unsigned char *)c, |=); c++); > - for (; *s && BITOP(byteset, *(unsigned char *)s, &); s++); > - return s-a; > + for (a=s; *a && BITOP(byteset, *(unsigned char *)a, &); a++); > + return a-s; > } Does this improve register utilization? It would be nice to explain why moving the assignment and changing the loop variable is an improvement. (Sending full git patches helps with that ;) This question applies to the previous diff as well. > > <https://git.musl-libc.org/cgit/musl/plain/src/string/strtok.c> > > #include <string.h> > > char *strtok(char *restrict s, const char *restrict sep) > { > static char *p; > + return strtok_r(s, sep, &p); > - if (!s && !(s = p)) return NULL; > - s += strspn(s, sep); > - if (!*s) return p = 0; > - p = s + strcspn(s, sep); > - if (*p) *p++ = 0; > - else p = 0; > - return s; > } It's been this way since the initial check-in commit, I wonder if it was intentional. Your way should save some bytes, though :) > > <https://git.musl-libc.org/cgit/musl/plain/src/string/strtok_r.c> > > #include <string.h> > > char *strtok_r(char *restrict s, const char *restrict sep, char > **restrict p) > { > if (!s && !(s = *p)) return NULL; > s += strspn(s, sep); > - if (!*s) return *p = 0; > + if (!*s) return *p = NULL; > *p = s + strcspn(s, sep); > if (**p) *(*p)++ = 0; > - else *p = 0; > + else *p = NULL; > return s; > } AFAIK musl style doesn't like using NULL, so 0 would always be correct... Unless using NULL here is intentional to not confuse with the string's null terminator? > > If you want to go a step further, avoid to build the same byteset twice: > > <https://git.musl-libc.org/cgit/musl/plain/src/string/strtok_r.c> > > #include <string.h> > > char *strtok_r(char *restrict s, const char *restrict sep, char > **restrict p) > { > + size_t byteset[32/sizeof(size_t)] = { 0 }; > + > if (!s && !(s = *p)) return NULL; > + if (!*s) return *p = NULL; > + if (!*sep) return *p = NULL, *s; > - s += strspn(s, sep); > + for (; *c && BITOP(byteset, *(unsigned char *)c, |=); c++); > + for (; *s && BITOP(byteset, *(unsigned char *)s, &); s++); > - if (!*s) return *p = 0; > + if (!*s) return *p = NULL; > - *p = s + strcspn(s, sep); > - if (**p) *(*p)++ = 0; > - else *p = 0; > - return s; > + sep = s; > + for (; *s && !BITOP(byteset, *(unsigned char *)s, &); s++); > + if (*s) *s++ = 0; > + else *s = NULL; > + *p = s; > + return sep; > } > > Stefan
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.