musl - Re: mips n64 porting review

Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20160223033233.GM9349@brightrain.aerifal.cx>
Date: Mon, 22 Feb 2016 22:32:33 -0500
From: Rich Felker <dalias@...c.org>
To: musl@...ts.openwall.com
Subject: Re: mips n64 porting review

On Mon, Feb 22, 2016 at 10:06:02AM +0000, Mahesh Bodapati wrote:
> Hi Rich,
> I have attached the patch which has all the MIPS n64 porting work. I
> have created mipsn64port_review remote branch on GitHub and please
> have a look at
> https://github.com/MaheshBodapati/musl/commits/mipsn64port_review
> which has the broken down patches and the base revision on which we
> have prepared patch is d150764
> 
> We have tested on both little endian and big endian, here are the FAILS
> 
> Musl mips n64 libc testing on EL :
> 
> Fails: 17
> FAIL ./src/api/main.exe [status 1]
> FAIL ./src/functional/wcstol-static.exe [status 1]
> FAIL ./src/functional/wcstol.exe [status 1]
> FAIL ./src/math/acosh.exe [status 1]
> FAIL ./src/math/asinh.exe [status 1]
> FAIL ./src/math/j0.exe [status 1]
> FAIL ./src/math/jn.exe [status 1]
> FAIL ./src/math/jnf.exe [status 1]
> FAIL ./src/math/lgamma.exe [status 1]
> FAIL ./src/math/lgamma_r.exe [status 1]
> FAIL ./src/math/lgammaf.exe [status 1]
> FAIL ./src/math/lgammaf_r.exe [status 1]
> FAIL ./src/math/sinh.exe [status 1]
> FAIL ./src/math/tgamma.exe [status 1]
> FAIL ./src/math/y0.exe [status 1]
> FAIL ./src/math/y0f.exe [status 1]
> FAIL ./src/math/ynf.exe [status 1]
> 
> Musl mips n64 libc testing on EB :
> Fails:17
> FAIL ./src/api/main.exe [status 1]
> FAIL ./src/functional/wcstol-static.exe [status 1]
> FAIL ./src/functional/wcstol.exe [status 1]
> FAIL ./src/math/acosh.exe [status 1]
> FAIL ./src/math/asinh.exe [status 1]
> FAIL ./src/math/j0.exe [status 1]
> FAIL ./src/math/jn.exe [status 1]
> FAIL ./src/math/jnf.exe [status 1]
> FAIL ./src/math/lgamma.exe [status 1]
> FAIL ./src/math/lgamma_r.exe [status 1]
> FAIL ./src/math/lgammaf.exe [status 1]
> FAIL ./src/math/lgammaf_r.exe [status 1]
> FAIL ./src/math/sinh.exe [status 1]
> FAIL ./src/math/tgamma.exe [status 1]
> FAIL ./src/math/y0.exe [status 1]
> FAIL ./src/math/y0f.exe [status 1]
> FAIL ./src/math/ynf.exe [status 1]

These look fairly expected.

> diff --git a/arch/mips/syscall_arch.h b/arch/mips/syscall_arch.h
               ^^^^^^^^^

This is changing the existing mips port (not mips64)...

> index 39c0ea3..8097180 100644
> --- a/arch/mips/syscall_arch.h
> +++ b/arch/mips/syscall_arch.h
> @@ -54,15 +54,16 @@ static inline long __syscall2(long n, long a, long b)
>  	register long r5 __asm__("$5") = b;
>  	register long r7 __asm__("$7");
>  	register long r2 __asm__("$2");
> +	long ret;
>  	__asm__ __volatile__ (
>  		"addu $2,$0,%2 ; syscall"
>  		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
>  		  "r"(r4), "r"(r5)
>  		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
>  		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
> -	if (r7) return -r2;
> -	long ret = r2;
> -	if (n == SYS_stat64 || n == SYS_fstat64 || n == SYS_lstat64) __stat_fix(b);
> +	ret = r7 ? -r2 : r2;
> +	if (n == SYS_stat64 || n == SYS_fstat64 || n == SYS_lstat64)
> +		__stat_fix (b);
>  	return ret;
>  }
>  
> @@ -79,10 +80,7 @@ static inline long __syscall3(long n, long a, long b, long c)
>  		  "r"(r4), "r"(r5), "r"(r6)
>  		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
>  		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
> -	if (r7) return -r2;
> -	long ret = r2;
> -	if (n == SYS_stat64 || n == SYS_fstat64 || n == SYS_lstat64) __stat_fix(b);
> -	return ret;
> +	return r7 ? -r2 : r2;
>  }

...and significantly breaking it.

> diff --git a/arch/mips64/atomic_arch.h b/arch/mips64/atomic_arch.h
> new file mode 100644
> index 0000000..d90442d
> --- /dev/null
> +++ b/arch/mips64/atomic_arch.h
> @@ -0,0 +1,221 @@
> +#define a_cas a_cas
> +static inline int a_cas(volatile int *p, int t, int s)
> +{
> +	int dummy;
> +	__asm__ __volatile__(
> +		".set push\n"
> +		".set noreorder\n"
> +		"	sync\n"
> +		"1:	ll %0, %2\n"
> +		"	bne %0, %3, 1f\n"
> +		"	addu %1, %4, $0\n"
> +		"	sc %1, %2\n"
> +		"	beq %1, $0, 1b\n"
> +		"	nop\n"
> +		"	sync\n"
> +		"1:	\n"
> +		".set pop\n"
> +		: "=&r"(t), "=&r"(dummy), "+m"(*p) : "r"(t), "r"(s) : "memory" );
> +        return t;
> +}
> [...]

As noted by Bobby Bingham, you can just define the new ll/sc fragments
rather than big asm blocks like were needed in old musl versions. I'll
try to get the right infrastructure for the 64-bit pointer variant
upstream right away so you can use it without needing more duplication
in the mips64 port; right now it's just in the aarch64 dir.

> diff --git a/arch/mips64/bits/float.h b/arch/mips64/bits/float.h
> new file mode 100644
> index 0000000..719c790
> --- /dev/null
> +++ b/arch/mips64/bits/float.h
> @@ -0,0 +1,16 @@
> +#define FLT_EVAL_METHOD 0
> +
> +#define LDBL_TRUE_MIN 6.47517511943802511092443895822764655e-4966L
> +#define LDBL_MIN 3.36210314311209350626267781732175260e-4932L
> +#define LDBL_MAX 1.18973149535723176508575932662800702e+4932L
> +#define LDBL_EPSILON 1.92592994438723585305597794258492732e-34L
> +
> +#define LDBL_MANT_DIG 113
> +#define LDBL_MIN_EXP (-16381)
> +#define LDBL_MAX_EXP 16384
> +
> +#define LDBL_DIG 33
> +#define LDBL_MIN_10_EXP (-4931)
> +#define LDBL_MAX_10_EXP 4932
> +
> +#define DECIMAL_DIG 36

Is your mips64 port using IEEE quad? These values look like they're
matched to IEEE quad, but I couldn't find clear answers on whether
mips64 is using IEEE quad or double-double. The latter cannot be
supported, so if that's what it's using, we'd need to either find a
way to configure the compiler for quad or for 64-bit long double.

> diff --git a/arch/mips64/bits/sem.h b/arch/mips64/bits/sem.h
> new file mode 100644
> index 0000000..1c05a22
> --- /dev/null
> +++ b/arch/mips64/bits/sem.h
> @@ -0,0 +1,8 @@
> +struct semid_ds {
> +	struct ipc_perm sem_perm;
> +	time_t sem_otime;
> +	time_t sem_ctime;
> +	unsigned long sem_nsems;
> +	unsigned long  __unused3;
> +	unsigned long __unused4;
> +};

sem_nsems has to have type unsigned short (this is a POSIX
requirement), with appropriate padding to match the kernel's wrong
definition. See how it's done on other archs including 32-bit mips.
The padding will need to be endian-dependent.

> diff --git a/arch/mips64/pthread_arch.h b/arch/mips64/pthread_arch.h
> new file mode 100644
> index 0000000..b42edbe
> --- /dev/null
> +++ b/arch/mips64/pthread_arch.h
> @@ -0,0 +1,18 @@
> +static inline struct pthread *__pthread_self()
> +{
> +#ifdef __clang__
> +	char *tp;
> +	__asm__ __volatile__ (".word 0x7c03e83b ; move %0, $3" : "=r" (tp) : : "$3" );
> +#else
> +	register char *tp __asm__("$3");
> +	__asm__ __volatile__ (".word 0x7c03e83b" : "=r" (tp) );
> +#endif
> +	return (pthread_t)(tp - 0x7000 - sizeof(struct pthread));
> +}

Not critical, but it would be nice to know if clang still needs this
pessimization or if there's a way to make it use the right register
directly.

> diff --git a/arch/mips64/syscall_arch.h b/arch/mips64/syscall_arch.h
> new file mode 100644
> index 0000000..380b3fb
> --- /dev/null
> +++ b/arch/mips64/syscall_arch.h
> @@ -0,0 +1,205 @@
> +#define __SYSCALL_LL_E(x) \
> +((union { long long ll; long l[2]; }){ .ll = x }).l[0], \
> +((union { long long ll; long l[2]; }){ .ll = x }).l[1]
> +#define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x))

As Bobby Bingham noted these should both just be defined as (x) for
64-bit archs.

> +#define SYSCALL_RLIM_INFINITY (-1UL/2)

Is this right for mips64?

> +#include <sys/stat.h>
> +struct kernel_stat {
> +    unsigned int st_dev;
> +    unsigned int __pad1[3];
> +    unsigned long long st_ino;
> +    unsigned int st_mode;
> +    unsigned int st_nlink;
> +    int st_uid;
> +    int st_gid;
> +    unsigned int st_rdev;
> +    unsigned int __pad2[3];
> +    long long st_size;
> +    unsigned int st_atime_sec;
> +    unsigned int st_atime_nsec;
> +    unsigned int st_mtime_sec;
> +    unsigned int st_mtime_nsec;
> +    unsigned int st_ctime_sec;
> +    unsigned int st_ctime_nsec;
> +    unsigned int st_blksize;
> +    unsigned int __pad3;
> +    unsigned long long st_blocks;
> +};
> +
> +static void __stat_fix(struct kernel_stat *kst, struct stat *st)
> +{
> +	extern void *memset(void *s, int c, size_t n);
> +
> +	st->st_dev = kst->st_dev;
> +	memset (&st->st_pad1, 0, sizeof (st->st_pad1));
> +	st->st_ino = kst->st_ino;
> +	st->st_mode = kst->st_mode;
> +	st->st_nlink = kst->st_nlink;
> +	st->st_uid = kst->st_uid;
> +	st->st_gid = kst->st_gid;
> +	st->st_rdev = kst->st_rdev;
> +	memset (&st->st_pad2, 0, sizeof (st->st_pad2));
> +	st->st_size = kst->st_size;
> +	st->st_pad3 = 0;
> +	st->st_atim.tv_sec = kst->st_atime_sec;
> +	st->st_atim.tv_nsec = kst->st_atime_nsec;
> +	st->st_mtim.tv_sec = kst->st_mtime_sec;
> +	st->st_mtim.tv_nsec = kst->st_mtime_nsec;
> +	st->st_ctim.tv_sec = kst->st_ctime_sec;
> +	st->st_ctim.tv_nsec = kst->st_ctime_nsec;
> +	st->st_blksize = kst->st_blksize;
> +	st->st_blocks = kst->st_blocks;
> +	memset (&st->st_pad5, 0, sizeof (st->st_pad5));
> +	return;
> +}

You've made this a lot more complicated than it needs to be. Just
copying the __stat_fix code from 32-bit mips should work fine. The
only fixup that needs to be done is for st_dev and st_rdev and it can
be done in-place.

> +
> +#ifndef __clang__
> +
> +static inline long __syscall0(long n)
> +{
> +	register long r7 __asm__("$7");
> +	register long r2 __asm__("$2");
> +	__asm__ __volatile__ (
> +		"daddu $2,$0,%2 ; syscall"
> +		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7)
> +		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
> +		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
> +	return r7 ? -r2 : r2;
> +}
> +
> +static inline long __syscall1(long n, long a)
> +{
> +	register long r4 __asm__("$4") = a;
> +	register long r7 __asm__("$7");
> +	register long r2 __asm__("$2");
> +	__asm__ __volatile__ (
> +		"daddu $2,$0,%2 ; syscall"
> +		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
> +		  "r"(r4)
> +		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
> +		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
> +	return r7 ? -r2 : r2;
> +}
> +
> +static inline long __syscall2(long n, long a, long b)
> +{
> +	struct kernel_stat kst = {0,};
> +	long ret;
> +	register long r4 __asm__("$4");
> +	register long r5 __asm__("$5");
> +	register long r7 __asm__("$7");
> +	register long r2 __asm__("$2");
> +
> +	r5 = b;
> +	if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat)
> +		r5 = (long) &kst;
> +
> +	r4 = a;
> +	__asm__ __volatile__ (
> +		"daddu $2,$0,%2 ; syscall"
> +		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
> +		  "r"(r4), "r"(r5)
> +		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
> +		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
> +
> +	ret = r7 ? -r2 : r2;
> +	if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat) 
> +		__stat_fix(&kst, (struct stat *)b);
> +
> +	return ret;
> +}

See the logic on 32-bit mips. There should be an early return without
performing __stat_fix in the error case (if r7 is nonzero). Otherwise
you're performing fixups on uninitialized data.

> diff --git a/ldso/dynlink.c b/ldso/dynlink.c
> index 87f3b7f..2355d74 100644
> --- a/ldso/dynlink.c
> +++ b/ldso/dynlink.c
> @@ -325,8 +325,8 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
>  	for (; rel_size; rel+=stride, rel_size-=stride*sizeof(size_t)) {
>  		if (skip_relative && IS_RELATIVE(rel[1], dso->syms)) continue;
>  		type = R_TYPE(rel[1]);
> -		if (type == REL_NONE) continue;
>  		sym_index = R_SYM(rel[1]);
> +		if (type == REL_NONE) continue;

This looks gratuitous and probably just leftover from old things you tried.

> @@ -1134,7 +1134,7 @@ static void do_mips_relocs(struct dso *p, size_t *got)
>  	Sym *sym = p->syms + j;
>  	rel[0] = (unsigned char *)got - base;
>  	for (i-=j; i; i--, sym++, rel[0]+=sizeof(size_t)) {
> -		rel[1] = sym-p->syms << 8 | R_MIPS_JUMP_SLOT;
> +		rel[1] = R_INFO(sym-p->syms, R_MIPS_JUMP_SLOT);
>  		do_relocs(p, rel, sizeof rel, 2);
>  	}
>  }

Looks ok.

> @@ -1365,7 +1365,7 @@ void __dls2(unsigned char *base, size_t *sp)
>  	size_t symbolic_rel_cnt = 0;
>  	apply_addends_to = rel;
>  	for (; rel_size; rel+=2, rel_size-=2*sizeof(size_t))
> -		if (!IS_RELATIVE(rel[1], ldso.syms)) symbolic_rel_cnt++;
> +	if (!IS_RELATIVE(rel[1], ldso.syms)) symbolic_rel_cnt++;

This looks like a non-functional change, but wrong (misleading
indention) and probably unintentional.

> diff --git a/src/internal/dynlink.h b/src/internal/dynlink.h
> index 48890b2..dcc1bde 100644
> --- a/src/internal/dynlink.h
> +++ b/src/internal/dynlink.h
> @@ -11,14 +11,49 @@ typedef Elf32_Phdr Phdr;
>  typedef Elf32_Sym Sym;
>  #define R_TYPE(x) ((x)&255)
>  #define R_SYM(x) ((x)>>8)
> +#define R_INFO	ELF32_R_INFO
>  #else
>  typedef Elf64_Ehdr Ehdr;
>  typedef Elf64_Phdr Phdr;
>  typedef Elf64_Sym Sym;
>  #define R_TYPE(x) ((x)&0x7fffffff)
>  #define R_SYM(x) ((x)>>32)
> +#define R_INFO	ELF64_R_INFO
> +#define ELF64 1
>  #endif

I don't think ELF64 is used anywhere so it can be removed.

> +#ifdef __mips64
> +#undef R_TYPE
> +#undef R_SYM
> +#undef R_INFO
> +#define R_TYPE(INFO) ({										\
> +	uint64_t r_info = (INFO);								\
> +	uint32_t *type, r_type;									\
> +	type = (((uint32_t*) &r_info) + 1);						\
> +	r_type = (uint32_t) *(((unsigned char*) type) + 3) |	\
> +	(uint32_t) *(((unsigned char*) type) + 2) << 8 |		\
> +	(uint32_t) *(((unsigned char*) type) + 1) << 16;		\
> +	r_type;													\
> +})
> +
> +#define R_SYM(INFO) ({						\
> +	uint64_t r_info = (INFO);				\
> +	uint32_t symidx;						\
> +	symidx = *(((uint32_t*) &r_info) + 0);	\
> +	symidx;									\
> +})
> +
> +#define R_INFO(SYM,TYPE1) ({					\
> +	uint32_t *pinfo;							\
> +	uint64_t r_info = 0;						\
> +	pinfo = ((uint32_t*) &r_info) + 1;			\
> +	*((uint32_t*) &r_info + 0) = (SYM);			\
> +	*((unsigned char*) pinfo + 3) = (TYPE1);	\
> +	r_info;										\
> +})
> +#endif

At the very least this is buggy (aliasing violations) and gratuitously
ugly, and might be wrong for little-endian. Have you tested it on
little?

This code above implies that the values are always stored in
big-endian order as (sym<<32 | type). If they're actually stored
native-endian (which I would expect) then the standard Elf64
definitions just work as-is and don't need replacement. If they're
always-big, tweaked versions of the standard ones that swap the byte
order of their inputs or outputs would be all that's needed.

Rich
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.