musl - Re: [PATCH] String: expand to word size && refactor || refactor

Follow @Openwall on Twitter for new release announcements and other news

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <20130407092327.GQ30576@port70.net>
Date: Sun, 7 Apr 2013 11:23:27 +0200
From: Szabolcs Nagy <nsz@...t70.net>
To: musl@...ts.openwall.com
Subject: Re: [PATCH] String: expand to word size && refactor ||
 refactor

* Nathan McSween <nwmcsween@...il.com> [2013-04-06 20:38:16 +0000]:
> diff --git a/src/string/memccpy.c b/src/string/memccpy.c
> index b85009c..f032030 100644
> --- a/src/string/memccpy.c
> +++ b/src/string/memccpy.c
> @@ -1,32 +1,36 @@
>  #include <string.h>
> -#include <stdlib.h>
>  #include <stdint.h>
> -#include <limits.h>
>  
> -#define ALIGN (sizeof(size_t)-1)
> -#define ONES ((size_t)-1/UCHAR_MAX)
> -#define HIGHS (ONES * (UCHAR_MAX/2+1))
> -#define HASZERO(x) ((x)-ONES & ~(x) & HIGHS)
> +#define HIGHS(x) ((x) - ((x)*0-1) / 255 * 128)
> +#define LOWS(x) (((x)*0-1) / 255)
> +#define has_char(x, c) ((x) - LOWS(x) & ~(x) & HIGHS(x) ^ LOWS(x) * (c))

the meaning of the new HIGHS is unclear

the original macros are easier to read, HIGHS is probably not
needed and i think size_t can be hardcoded, so

#define ONES ((size_t)-1/255)
#define HASZERO(x) ((x)-ONES & ~(x) & ONES*128)

a has_char can be useful for clarity (but the original code
made sure ones*c is only calculated once) and then i'd use

#define HASCHAR(x,c) HASZERO((x) ^ ONES*(unsigned char)(c))

> -void *memccpy(void *restrict dest, const void *restrict src, int c, size_t n)
> +void *memccpy(void *restrict d, const void *restrict s, int c, size_t n)
>  {
> -	unsigned char *d = dest;
> -	const unsigned char *s = src;
> -	size_t *wd, k;
> +	unsigned char *cd = (unsigned char *)d;
> +	const unsigned char *cs = (const unsigned char *)s;
> +	size_t *wd;
>  	const size_t *ws;
>  
>  	c = (unsigned char)c;
> -	if (((uintptr_t)s & ALIGN) == ((uintptr_t)d & ALIGN)) {
> -		for (; ((uintptr_t)s & ALIGN) && n && (*d=*s)!=c; n--, s++, d++);
> -		if ((uintptr_t)s & ALIGN) goto tail;
> -		k = ONES * c;
> -		wd=(void *)d; ws=(const void *)s;
> -		for (; n>=sizeof(size_t) && !HASZERO(*ws^k);
> -		       n-=sizeof(size_t), ws++, wd++) *wd = *ws;
> -		d=(void *)wd; s=(const void *)ws;
> -	}
> -	for (; n && (*d=*s)!=c; n--, s++, d++);
> -tail:
> -	if (*s==c) return d+1;
> -	return 0;
> +
> +	if ((uintptr_t)s % sizeof(size_t) != (uintptr_t)d % sizeof(size_t)
> +	    || n < sizeof(size_t))
> +		goto bytewise;
> +
> +	for (; ; *cd = *cs, cd++, cs++)
> +		if (*cs == c) return cd + 1;

if ((*cd++ = *cs++) == c) return cd; is more idiomatic

with your code
the c at the end is not copied, this is a bug
the n limit is not accounted for, this is a bug
the code below is never reached, this is another bug

> +	for (wd = (size_t *)d, ws = (const size_t *)s
> +	     ; !has_char(*ws, c) && n >= sizeof(size_t)
> +	     ; ws++, wd++, *wd = *ws);
> +
> +	cd = (unsigned char *)wd;
> +	cs = (const unsigned char *)ws;
> +
> +bytewise:
> +	for (; *cs != c; *cd = *cs, cs++, cd++, n--)
> +		if (!n) return NULL;
> +
> +	return cd + 1;
>  }

i think you should test and benchmark before submitting
such code

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.