Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20160223033233.GM9349@brightrain.aerifal.cx>
Date: Mon, 22 Feb 2016 22:32:33 -0500
From: Rich Felker <dalias@...c.org>
To: musl@...ts.openwall.com
Subject: Re: mips n64 porting review

On Mon, Feb 22, 2016 at 10:06:02AM +0000, Mahesh Bodapati wrote:
> Hi Rich,
> I have attached the patch which has all the MIPS n64 porting work. I
> have created mipsn64port_review remote branch on GitHub and please
> have a look at
> https://github.com/MaheshBodapati/musl/commits/mipsn64port_review
> which has the broken down patches and the base revision on which we
> have prepared patch is d150764
> 
> We have tested on both little endian and big endian, here are the FAILS
> 
> Musl mips n64 libc testing on EL :
> 
> Fails: 17
> FAIL ./src/api/main.exe [status 1]
> FAIL ./src/functional/wcstol-static.exe [status 1]
> FAIL ./src/functional/wcstol.exe [status 1]
> FAIL ./src/math/acosh.exe [status 1]
> FAIL ./src/math/asinh.exe [status 1]
> FAIL ./src/math/j0.exe [status 1]
> FAIL ./src/math/jn.exe [status 1]
> FAIL ./src/math/jnf.exe [status 1]
> FAIL ./src/math/lgamma.exe [status 1]
> FAIL ./src/math/lgamma_r.exe [status 1]
> FAIL ./src/math/lgammaf.exe [status 1]
> FAIL ./src/math/lgammaf_r.exe [status 1]
> FAIL ./src/math/sinh.exe [status 1]
> FAIL ./src/math/tgamma.exe [status 1]
> FAIL ./src/math/y0.exe [status 1]
> FAIL ./src/math/y0f.exe [status 1]
> FAIL ./src/math/ynf.exe [status 1]
> 
> Musl mips n64 libc testing on EB :
> Fails:17
> FAIL ./src/api/main.exe [status 1]
> FAIL ./src/functional/wcstol-static.exe [status 1]
> FAIL ./src/functional/wcstol.exe [status 1]
> FAIL ./src/math/acosh.exe [status 1]
> FAIL ./src/math/asinh.exe [status 1]
> FAIL ./src/math/j0.exe [status 1]
> FAIL ./src/math/jn.exe [status 1]
> FAIL ./src/math/jnf.exe [status 1]
> FAIL ./src/math/lgamma.exe [status 1]
> FAIL ./src/math/lgamma_r.exe [status 1]
> FAIL ./src/math/lgammaf.exe [status 1]
> FAIL ./src/math/lgammaf_r.exe [status 1]
> FAIL ./src/math/sinh.exe [status 1]
> FAIL ./src/math/tgamma.exe [status 1]
> FAIL ./src/math/y0.exe [status 1]
> FAIL ./src/math/y0f.exe [status 1]
> FAIL ./src/math/ynf.exe [status 1]

These look fairly expected.

> diff --git a/arch/mips/syscall_arch.h b/arch/mips/syscall_arch.h
               ^^^^^^^^^

This is changing the existing mips port (not mips64)...

> index 39c0ea3..8097180 100644
> --- a/arch/mips/syscall_arch.h
> +++ b/arch/mips/syscall_arch.h
> @@ -54,15 +54,16 @@ static inline long __syscall2(long n, long a, long b)
>  	register long r5 __asm__("$5") = b;
>  	register long r7 __asm__("$7");
>  	register long r2 __asm__("$2");
> +	long ret;
>  	__asm__ __volatile__ (
>  		"addu $2,$0,%2 ; syscall"
>  		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
>  		  "r"(r4), "r"(r5)
>  		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
>  		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
> -	if (r7) return -r2;
> -	long ret = r2;
> -	if (n == SYS_stat64 || n == SYS_fstat64 || n == SYS_lstat64) __stat_fix(b);
> +	ret = r7 ? -r2 : r2;
> +	if (n == SYS_stat64 || n == SYS_fstat64 || n == SYS_lstat64)
> +		__stat_fix (b);
>  	return ret;
>  }
>  
> @@ -79,10 +80,7 @@ static inline long __syscall3(long n, long a, long b, long c)
>  		  "r"(r4), "r"(r5), "r"(r6)
>  		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
>  		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
> -	if (r7) return -r2;
> -	long ret = r2;
> -	if (n == SYS_stat64 || n == SYS_fstat64 || n == SYS_lstat64) __stat_fix(b);
> -	return ret;
> +	return r7 ? -r2 : r2;
>  }

...and significantly breaking it.

> diff --git a/arch/mips64/atomic_arch.h b/arch/mips64/atomic_arch.h
> new file mode 100644
> index 0000000..d90442d
> --- /dev/null
> +++ b/arch/mips64/atomic_arch.h
> @@ -0,0 +1,221 @@
> +#define a_cas a_cas
> +static inline int a_cas(volatile int *p, int t, int s)
> +{
> +	int dummy;
> +	__asm__ __volatile__(
> +		".set push\n"
> +		".set noreorder\n"
> +		"	sync\n"
> +		"1:	ll %0, %2\n"
> +		"	bne %0, %3, 1f\n"
> +		"	addu %1, %4, $0\n"
> +		"	sc %1, %2\n"
> +		"	beq %1, $0, 1b\n"
> +		"	nop\n"
> +		"	sync\n"
> +		"1:	\n"
> +		".set pop\n"
> +		: "=&r"(t), "=&r"(dummy), "+m"(*p) : "r"(t), "r"(s) : "memory" );
> +        return t;
> +}
> [...]

As noted by Bobby Bingham, you can just define the new ll/sc fragments
rather than big asm blocks like were needed in old musl versions. I'll
try to get the right infrastructure for the 64-bit pointer variant
upstream right away so you can use it without needing more duplication
in the mips64 port; right now it's just in the aarch64 dir.

> diff --git a/arch/mips64/bits/float.h b/arch/mips64/bits/float.h
> new file mode 100644
> index 0000000..719c790
> --- /dev/null
> +++ b/arch/mips64/bits/float.h
> @@ -0,0 +1,16 @@
> +#define FLT_EVAL_METHOD 0
> +
> +#define LDBL_TRUE_MIN 6.47517511943802511092443895822764655e-4966L
> +#define LDBL_MIN 3.36210314311209350626267781732175260e-4932L
> +#define LDBL_MAX 1.18973149535723176508575932662800702e+4932L
> +#define LDBL_EPSILON 1.92592994438723585305597794258492732e-34L
> +
> +#define LDBL_MANT_DIG 113
> +#define LDBL_MIN_EXP (-16381)
> +#define LDBL_MAX_EXP 16384
> +
> +#define LDBL_DIG 33
> +#define LDBL_MIN_10_EXP (-4931)
> +#define LDBL_MAX_10_EXP 4932
> +
> +#define DECIMAL_DIG 36

Is your mips64 port using IEEE quad? These values look like they're
matched to IEEE quad, but I couldn't find clear answers on whether
mips64 is using IEEE quad or double-double. The latter cannot be
supported, so if that's what it's using, we'd need to either find a
way to configure the compiler for quad or for 64-bit long double.

> diff --git a/arch/mips64/bits/sem.h b/arch/mips64/bits/sem.h
> new file mode 100644
> index 0000000..1c05a22
> --- /dev/null
> +++ b/arch/mips64/bits/sem.h
> @@ -0,0 +1,8 @@
> +struct semid_ds {
> +	struct ipc_perm sem_perm;
> +	time_t sem_otime;
> +	time_t sem_ctime;
> +	unsigned long sem_nsems;
> +	unsigned long  __unused3;
> +	unsigned long __unused4;
> +};

sem_nsems has to have type unsigned short (this is a POSIX
requirement), with appropriate padding to match the kernel's wrong
definition. See how it's done on other archs including 32-bit mips.
The padding will need to be endian-dependent.

> diff --git a/arch/mips64/pthread_arch.h b/arch/mips64/pthread_arch.h
> new file mode 100644
> index 0000000..b42edbe
> --- /dev/null
> +++ b/arch/mips64/pthread_arch.h
> @@ -0,0 +1,18 @@
> +static inline struct pthread *__pthread_self()
> +{
> +#ifdef __clang__
> +	char *tp;
> +	__asm__ __volatile__ (".word 0x7c03e83b ; move %0, $3" : "=r" (tp) : : "$3" );
> +#else
> +	register char *tp __asm__("$3");
> +	__asm__ __volatile__ (".word 0x7c03e83b" : "=r" (tp) );
> +#endif
> +	return (pthread_t)(tp - 0x7000 - sizeof(struct pthread));
> +}

Not critical, but it would be nice to know if clang still needs this
pessimization or if there's a way to make it use the right register
directly.

> diff --git a/arch/mips64/syscall_arch.h b/arch/mips64/syscall_arch.h
> new file mode 100644
> index 0000000..380b3fb
> --- /dev/null
> +++ b/arch/mips64/syscall_arch.h
> @@ -0,0 +1,205 @@
> +#define __SYSCALL_LL_E(x) \
> +((union { long long ll; long l[2]; }){ .ll = x }).l[0], \
> +((union { long long ll; long l[2]; }){ .ll = x }).l[1]
> +#define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x))

As Bobby Bingham noted these should both just be defined as (x) for
64-bit archs.

> +#define SYSCALL_RLIM_INFINITY (-1UL/2)

Is this right for mips64?

> +#include <sys/stat.h>
> +struct kernel_stat {
> +    unsigned int st_dev;
> +    unsigned int __pad1[3];
> +    unsigned long long st_ino;
> +    unsigned int st_mode;
> +    unsigned int st_nlink;
> +    int st_uid;
> +    int st_gid;
> +    unsigned int st_rdev;
> +    unsigned int __pad2[3];
> +    long long st_size;
> +    unsigned int st_atime_sec;
> +    unsigned int st_atime_nsec;
> +    unsigned int st_mtime_sec;
> +    unsigned int st_mtime_nsec;
> +    unsigned int st_ctime_sec;
> +    unsigned int st_ctime_nsec;
> +    unsigned int st_blksize;
> +    unsigned int __pad3;
> +    unsigned long long st_blocks;
> +};
> +
> +static void __stat_fix(struct kernel_stat *kst, struct stat *st)
> +{
> +	extern void *memset(void *s, int c, size_t n);
> +
> +	st->st_dev = kst->st_dev;
> +	memset (&st->st_pad1, 0, sizeof (st->st_pad1));
> +	st->st_ino = kst->st_ino;
> +	st->st_mode = kst->st_mode;
> +	st->st_nlink = kst->st_nlink;
> +	st->st_uid = kst->st_uid;
> +	st->st_gid = kst->st_gid;
> +	st->st_rdev = kst->st_rdev;
> +	memset (&st->st_pad2, 0, sizeof (st->st_pad2));
> +	st->st_size = kst->st_size;
> +	st->st_pad3 = 0;
> +	st->st_atim.tv_sec = kst->st_atime_sec;
> +	st->st_atim.tv_nsec = kst->st_atime_nsec;
> +	st->st_mtim.tv_sec = kst->st_mtime_sec;
> +	st->st_mtim.tv_nsec = kst->st_mtime_nsec;
> +	st->st_ctim.tv_sec = kst->st_ctime_sec;
> +	st->st_ctim.tv_nsec = kst->st_ctime_nsec;
> +	st->st_blksize = kst->st_blksize;
> +	st->st_blocks = kst->st_blocks;
> +	memset (&st->st_pad5, 0, sizeof (st->st_pad5));
> +	return;
> +}

You've made this a lot more complicated than it needs to be. Just
copying the __stat_fix code from 32-bit mips should work fine. The
only fixup that needs to be done is for st_dev and st_rdev and it can
be done in-place.

> +
> +#ifndef __clang__
> +
> +static inline long __syscall0(long n)
> +{
> +	register long r7 __asm__("$7");
> +	register long r2 __asm__("$2");
> +	__asm__ __volatile__ (
> +		"daddu $2,$0,%2 ; syscall"
> +		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7)
> +		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
> +		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
> +	return r7 ? -r2 : r2;
> +}
> +
> +static inline long __syscall1(long n, long a)
> +{
> +	register long r4 __asm__("$4") = a;
> +	register long r7 __asm__("$7");
> +	register long r2 __asm__("$2");
> +	__asm__ __volatile__ (
> +		"daddu $2,$0,%2 ; syscall"
> +		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
> +		  "r"(r4)
> +		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
> +		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
> +	return r7 ? -r2 : r2;
> +}
> +
> +static inline long __syscall2(long n, long a, long b)
> +{
> +	struct kernel_stat kst = {0,};
> +	long ret;
> +	register long r4 __asm__("$4");
> +	register long r5 __asm__("$5");
> +	register long r7 __asm__("$7");
> +	register long r2 __asm__("$2");
> +
> +	r5 = b;
> +	if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat)
> +		r5 = (long) &kst;
> +
> +	r4 = a;
> +	__asm__ __volatile__ (
> +		"daddu $2,$0,%2 ; syscall"
> +		: "=&r"(r2), "=r"(r7) : "ir"(n), "0"(r2), "1"(r7),
> +		  "r"(r4), "r"(r5)
> +		: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
> +		  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
> +
> +	ret = r7 ? -r2 : r2;
> +	if (n == SYS_stat || n == SYS_fstat || n == SYS_lstat) 
> +		__stat_fix(&kst, (struct stat *)b);
> +
> +	return ret;
> +}

See the logic on 32-bit mips. There should be an early return without
performing __stat_fix in the error case (if r7 is nonzero). Otherwise
you're performing fixups on uninitialized data.

> diff --git a/ldso/dynlink.c b/ldso/dynlink.c
> index 87f3b7f..2355d74 100644
> --- a/ldso/dynlink.c
> +++ b/ldso/dynlink.c
> @@ -325,8 +325,8 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
>  	for (; rel_size; rel+=stride, rel_size-=stride*sizeof(size_t)) {
>  		if (skip_relative && IS_RELATIVE(rel[1], dso->syms)) continue;
>  		type = R_TYPE(rel[1]);
> -		if (type == REL_NONE) continue;
>  		sym_index = R_SYM(rel[1]);
> +		if (type == REL_NONE) continue;

This looks gratuitous and probably just leftover from old things you tried.

> @@ -1134,7 +1134,7 @@ static void do_mips_relocs(struct dso *p, size_t *got)
>  	Sym *sym = p->syms + j;
>  	rel[0] = (unsigned char *)got - base;
>  	for (i-=j; i; i--, sym++, rel[0]+=sizeof(size_t)) {
> -		rel[1] = sym-p->syms << 8 | R_MIPS_JUMP_SLOT;
> +		rel[1] = R_INFO(sym-p->syms, R_MIPS_JUMP_SLOT);
>  		do_relocs(p, rel, sizeof rel, 2);
>  	}
>  }

Looks ok.

> @@ -1365,7 +1365,7 @@ void __dls2(unsigned char *base, size_t *sp)
>  	size_t symbolic_rel_cnt = 0;
>  	apply_addends_to = rel;
>  	for (; rel_size; rel+=2, rel_size-=2*sizeof(size_t))
> -		if (!IS_RELATIVE(rel[1], ldso.syms)) symbolic_rel_cnt++;
> +	if (!IS_RELATIVE(rel[1], ldso.syms)) symbolic_rel_cnt++;

This looks like a non-functional change, but wrong (misleading
indention) and probably unintentional.

> diff --git a/src/internal/dynlink.h b/src/internal/dynlink.h
> index 48890b2..dcc1bde 100644
> --- a/src/internal/dynlink.h
> +++ b/src/internal/dynlink.h
> @@ -11,14 +11,49 @@ typedef Elf32_Phdr Phdr;
>  typedef Elf32_Sym Sym;
>  #define R_TYPE(x) ((x)&255)
>  #define R_SYM(x) ((x)>>8)
> +#define R_INFO	ELF32_R_INFO
>  #else
>  typedef Elf64_Ehdr Ehdr;
>  typedef Elf64_Phdr Phdr;
>  typedef Elf64_Sym Sym;
>  #define R_TYPE(x) ((x)&0x7fffffff)
>  #define R_SYM(x) ((x)>>32)
> +#define R_INFO	ELF64_R_INFO
> +#define ELF64 1
>  #endif

I don't think ELF64 is used anywhere so it can be removed.

> +#ifdef __mips64
> +#undef R_TYPE
> +#undef R_SYM
> +#undef R_INFO
> +#define R_TYPE(INFO) ({										\
> +	uint64_t r_info = (INFO);								\
> +	uint32_t *type, r_type;									\
> +	type = (((uint32_t*) &r_info) + 1);						\
> +	r_type = (uint32_t) *(((unsigned char*) type) + 3) |	\
> +	(uint32_t) *(((unsigned char*) type) + 2) << 8 |		\
> +	(uint32_t) *(((unsigned char*) type) + 1) << 16;		\
> +	r_type;													\
> +})
> +
> +#define R_SYM(INFO) ({						\
> +	uint64_t r_info = (INFO);				\
> +	uint32_t symidx;						\
> +	symidx = *(((uint32_t*) &r_info) + 0);	\
> +	symidx;									\
> +})
> +
> +#define R_INFO(SYM,TYPE1) ({					\
> +	uint32_t *pinfo;							\
> +	uint64_t r_info = 0;						\
> +	pinfo = ((uint32_t*) &r_info) + 1;			\
> +	*((uint32_t*) &r_info + 0) = (SYM);			\
> +	*((unsigned char*) pinfo + 3) = (TYPE1);	\
> +	r_info;										\
> +})
> +#endif

At the very least this is buggy (aliasing violations) and gratuitously
ugly, and might be wrong for little-endian. Have you tested it on
little?

This code above implies that the values are always stored in
big-endian order as (sym<<32 | type). If they're actually stored
native-endian (which I would expect) then the standard Elf64
definitions just work as-is and don't need replacement. If they're
always-big, tweaked versions of the standard ones that swap the byte
order of their inputs or outputs would be all that's needed.

Rich

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.