Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20171215044122.GK1627@brightrain.aerifal.cx>
Date: Thu, 14 Dec 2017 23:41:22 -0500
From: Rich Felker <dalias@...c.org>
To: musl@...ts.openwall.com
Subject: Re: [PATCH] remove a_ctz_l from atomic_arch.h

On Mon, Dec 04, 2017 at 06:05:21PM -0800, Andre McCurdy wrote:
> Arch specific code should now define only a_ctz_32 and/or a_ctz_64
> and a_ctz_l will always be defined generically.
> 
> Provide an ARM specific a_ctz_32 helper function for ARM architecture
> versions for which it can be implemented efficiently via the "rbit"
> instruction (ie all Thumb-2 capable versions of ARM v6 and above).
> 
> Provide a more optimal generic a_ctz_32 for targets which support
> a_clz_32 but not a_ctz_32 (e.g. ARMv5) and define generic a_ctz_64 in
> terms of a_ctz_32 to make better use of architure specific a_ctz_32.
> 
> Signed-off-by: Andre McCurdy <armccurdy@...il.com>
> ---
>  arch/arm/atomic_arch.h  | 12 ++++++++++++
>  arch/i386/atomic_arch.h |  6 +++---
>  arch/x32/atomic_arch.h  |  4 ++--
>  src/internal/atomic.h   | 42 +++++++++++++++++++++++-------------------
>  4 files changed, 40 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/arm/atomic_arch.h b/arch/arm/atomic_arch.h
> index 6e2e3b4..62458b4 100644
> --- a/arch/arm/atomic_arch.h
> +++ b/arch/arm/atomic_arch.h
> @@ -91,4 +91,16 @@ static inline int a_clz_32(uint32_t x)
>  	return x;
>  }
>  
> +#if __ARM_ARCH_6T2__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
> +
> +#define a_ctz_32 a_ctz_32
> +static inline int a_ctz_32(uint32_t x)
> +{
> +	uint32_t xr;
> +	__asm__ ("rbit %0, %1" : "=r"(xr) : "r"(x));
> +	return a_clz_32(xr);
> +}
> +
> +#endif
> +
>  #endif
> diff --git a/arch/i386/atomic_arch.h b/arch/i386/atomic_arch.h
> index 7d2a48a..047fb68 100644
> --- a/arch/i386/atomic_arch.h
> +++ b/arch/i386/atomic_arch.h
> @@ -92,10 +92,10 @@ static inline int a_ctz_64(uint64_t x)
>  	return r;
>  }
>  
> -#define a_ctz_l a_ctz_l
> -static inline int a_ctz_l(unsigned long x)
> +#define a_ctz_32 a_ctz_32
> +static inline int a_ctz_32(uint32_t x)
>  {
> -	long r;
> +	int r;
>  	__asm__( "bsf %1,%0" : "=r"(r) : "r"(x) );
>  	return r;
>  }
> diff --git a/arch/x32/atomic_arch.h b/arch/x32/atomic_arch.h
> index a744c29..918c2d4 100644
> --- a/arch/x32/atomic_arch.h
> +++ b/arch/x32/atomic_arch.h
> @@ -106,8 +106,8 @@ static inline int a_ctz_64(uint64_t x)
>  	return x;
>  }
>  
> -#define a_ctz_l a_ctz_l
> -static inline int a_ctz_l(unsigned long x)
> +#define a_ctz_32 a_ctz_32
> +static inline int a_ctz_32(uint32_t x)
>  {
>  	__asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
>  	return x;
> diff --git a/src/internal/atomic.h b/src/internal/atomic.h
> index ab473dd..f938879 100644
> --- a/src/internal/atomic.h
> +++ b/src/internal/atomic.h
> @@ -251,6 +251,22 @@ static inline void a_crash()
>  }
>  #endif
>  
> +#ifndef a_ctz_32
> +#define a_ctz_32 a_ctz_32
> +static inline int a_ctz_32(uint32_t x)
> +{
> +#ifdef a_clz_32
> +	return 31-a_clz_32(x&-x);
> +#else
> +	static const char debruijn32[32] = {
> +		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
> +		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
> +	};
> +	return debruijn32[(x&-x)*0x076be629 >> 27];
> +#endif
> +}
> +#endif
> +
>  #ifndef a_ctz_64
>  #define a_ctz_64 a_ctz_64
>  static inline int a_ctz_64(uint64_t x)
> @@ -261,22 +277,23 @@ static inline int a_ctz_64(uint64_t x)
>  		63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10,
>  		51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12
>  	};
> -	static const char debruijn32[32] = {
> -		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
> -		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
> -	};
>  	if (sizeof(long) < 8) {
>  		uint32_t y = x;
>  		if (!y) {
>  			y = x>>32;
> -			return 32 + debruijn32[(y&-y)*0x076be629 >> 27];
> +			return 32 + a_ctz_32(y);
>  		}
> -		return debruijn32[(y&-y)*0x076be629 >> 27];
> +		return a_ctz_32(y);
>  	}
>  	return debruijn64[(x&-x)*0x022fdd63cc95386dull >> 58];
>  }
>  #endif
>  
> +static inline int a_ctz_l(unsigned long x)
> +{
> +	return (sizeof(long) < 8) ? a_ctz_32(x) : a_ctz_64(x);
> +}
> +
>  #ifndef a_clz_64
>  #define a_clz_64 a_clz_64
>  static inline int a_clz_64(uint64_t x)
> @@ -298,17 +315,4 @@ static inline int a_clz_64(uint64_t x)
>  }
>  #endif
>  
> -#ifndef a_ctz_l
> -#define a_ctz_l a_ctz_l
> -static inline int a_ctz_l(unsigned long x)
> -{
> -	static const char debruijn32[32] = {
> -		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
> -		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
> -	};
> -	if (sizeof(long) == 8) return a_ctz_64(x);
> -	return debruijn32[(x&-x)*0x076be629 >> 27];
> -}
> -#endif
> -
>  #endif
> -- 
> 1.9.1

Overall I think this looks good. I want to recheck it again before
committing, and I'd like to separate out the ARM rbit addition from
the refactoring as a separate commit, but I can do that when I merge
unless you really want to do it first.

Rich

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.