Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date: Tue, 28 Nov 2017 12:31:18 +0000
From: Nicholas Wilson <nicholas.wilson@...lvnc.com>
To: "musl@...ts.openwall.com" <musl@...ts.openwall.com>
Subject: [PATCH] Wasm support patch 2 (static syscalls)

The next patch is more complicated.

For WebAssembly, I think we'll want to be able to "statically link" in syscalls.  Under the hood, Musl is basically a layer that translates calls to the libc API into calls to an underlying syscall API - so for Wasm we want to exploit that with a twist, by allowing the interpreter environment to provide the syscall definitions.

In order for linkage to work nicely, the syscalls need to be split out into their own functions. I'll give a worked example.

Imagine an application that calls "getpid()".  This will cause "getpid.o" to be linked in, to provide the C API:

pid_t getpid(void)
{
	return __syscall(SYS_getpid);
}

On all of Musl's existing archs, the syscalls are implemented via a seven generic "__syscallN" functions. The assumption is that the kernel provides all syscalls.

For Wasm, what I've done is made it so that the interpreter environment instead provides *named* syscall functions, in this case, a "__syscall_getpid" function. Then, at link-time, when the linker links against libc.a it's able to link in to the application only the syscalls that are actually used.

This keeps the changes to Musl down to an absolute minimum. We can compile all of Musl for the Wasm target, without exclusions, including pthreads, signals, I/O. All that's needed to make it link is for the application developer to provide an embedding environment that implements the named syscalls that the application uses.

It might be clearer when I post Patch #3 shortly, which contains my current stab at a minimal Wasm arch. This patch just contains the code changes needed to accommodate static syscall linkage.

There is one wrinkle I've had to work around - by linking in *named* syscalls, it means that syscalls can't be called by number. There are only three places where this is done in Musl currently (ie only three places where the argument to "__syscall" is not one of the named "SYS_XXX" constants).
1. The "syscall" function itself. We can simply ifdef this out, that's easy (src/misc/syscall.c)
2. The pthread_cancel() support does some fiddly stuff. For Wasm, I've just added an ifdef again so that this isn't provided. I think that's reasonably given how messy a feature it is generally, and application developers really can't expect Wasm to provide it, surely!
3. The generic "__setxid" function that wraps the "set(r)(e)(u|g)id" functions.  I've worked around this by tweaking it to use a simple table - a bit clumsy, but I hope no-one minds too much?

Feedback welcome, this is just really to start a dialogue about how the "static syscall" model could be used in Musl.

All the best,
Nick


diff --git a/src/internal/libc.h b/src/internal/libc.h
index 5e145183..6265062d 100644
--- a/src/internal/libc.h
+++ b/src/internal/libc.h
@@ -55,6 +55,10 @@ void __unlockfile(FILE *) ATTR_LIBC_VISIBILITY;
 #define UNLOCK(x) __unlock(x)
 
 void __synccall(void (*)(void *), void *);
+
+typedef enum __xid {
+	xid_resuid, xid_resgid, xid_reuid, xid_regid, xid_uid, xid_gid
+} xid_t;
 int __setxid(int, int, int, int);
 
 extern char **__environ;
diff --git a/src/internal/syscall.h b/src/internal/syscall.h
index 6d378a81..92650c45 100644
--- a/src/internal/syscall.h
+++ b/src/internal/syscall.h
@@ -26,7 +26,19 @@ long __syscall_ret(unsigned long), __syscall(syscall_arg_t, ...),
 	__syscall_cp(syscall_arg_t, syscall_arg_t, syscall_arg_t, syscall_arg_t,
 	             syscall_arg_t, syscall_arg_t, syscall_arg_t);
 
-#ifdef SYSCALL_NO_INLINE
+#define __SYSCALL_CONCAT_X(a,b) a##b
+#define __SYSCALL_CONCAT(a,b) __SYSCALL_CONCAT_X(a,b)
+
+#ifdef SYSCALL_STATIC
+#define __syscall0(n) __SYSCALL_CONCAT(__,n)(0)
+#define __syscall1(n,a) __SYSCALL_CONCAT(__,n)(__scc(a))
+#define __syscall2(n,a,b) __SYSCALL_CONCAT(__,n)(__scc(a),__scc(b))
+#define __syscall3(n,a,b,c) __SYSCALL_CONCAT(__,n)(__scc(a),__scc(b),__scc(c))
+#define __syscall4(n,a,b,c,d) __SYSCALL_CONCAT(__,n)(__scc(a),__scc(b),__scc(c),__scc(d))
+#define __syscall5(n,a,b,c,d,e) __SYSCALL_CONCAT(__,n)(__scc(a),__scc(b),__scc(c),__scc(d),__scc(e))
+#define __syscall6(n,a,b,c,d,e,f) __SYSCALL_CONCAT(__,n)(__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f))
+#define __syscall7(n,a,b,c,d,e,f,g) __SYSCALL_CONCAT(__,n)(__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f),__scc(g))
+#elif defined SYSCALL_NO_INLINE
 #define __syscall0(n) (__syscall)(n)
 #define __syscall1(n,a) (__syscall)(n,__scc(a))
 #define __syscall2(n,a,b) (__syscall)(n,__scc(a),__scc(b))
@@ -34,20 +46,20 @@ long __syscall_ret(unsigned long), __syscall(syscall_arg_t, ...),
 #define __syscall4(n,a,b,c,d) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d))
 #define __syscall5(n,a,b,c,d,e) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e))
 #define __syscall6(n,a,b,c,d,e,f) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f))
+#define __syscall7(n,a,b,c,d,e,f,g) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f),__scc(g))
 #else
+#define __syscall0(n) __syscall0(n)
 #define __syscall1(n,a) __syscall1(n,__scc(a))
 #define __syscall2(n,a,b) __syscall2(n,__scc(a),__scc(b))
 #define __syscall3(n,a,b,c) __syscall3(n,__scc(a),__scc(b),__scc(c))
 #define __syscall4(n,a,b,c,d) __syscall4(n,__scc(a),__scc(b),__scc(c),__scc(d))
 #define __syscall5(n,a,b,c,d,e) __syscall5(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e))
 #define __syscall6(n,a,b,c,d,e,f) __syscall6(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f))
+#define __syscall7(n,a,b,c,d,e,f,g) __syscall7(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f),__scc(g))
 #endif
-#define __syscall7(n,a,b,c,d,e,f,g) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f),__scc(g))
 
 #define __SYSCALL_NARGS_X(a,b,c,d,e,f,g,h,n,...) n
 #define __SYSCALL_NARGS(...) __SYSCALL_NARGS_X(__VA_ARGS__,7,6,5,4,3,2,1,0,)
-#define __SYSCALL_CONCAT_X(a,b) a##b
-#define __SYSCALL_CONCAT(a,b) __SYSCALL_CONCAT_X(a,b)
 #define __SYSCALL_DISP(b,...) __SYSCALL_CONCAT(b,__SYSCALL_NARGS(__VA_ARGS__))(__VA_ARGS__)
 
 #define __syscall(...) __SYSCALL_DISP(__syscall,__VA_ARGS__)
@@ -56,6 +68,18 @@ long __syscall_ret(unsigned long), __syscall(syscall_arg_t, ...),
 #define socketcall __socketcall
 #define socketcall_cp __socketcall_cp
 
+#ifdef SYSCALL_STATIC
+// For archs that define SYSCALL_STATIC (wasm), we basically just don't allow
+// for pthread_cancel().  I don't expect wasm will ever allow for cancellable
+// waits so that's OK.
+#define __syscall_cp0(n) __syscall0(n)
+#define __syscall_cp1(n,a) __syscall1(n,a)
+#define __syscall_cp2(n,a,b) __syscall2(n,a,b)
+#define __syscall_cp3(n,a,b,c) __syscall3(n,a,b,c)
+#define __syscall_cp4(n,a,b,c,d) __syscall4(n,a,b,c,d)
+#define __syscall_cp5(n,a,b,c,d,e) __syscall5(n,a,b,c,d,e)
+#define __syscall_cp6(n,a,b,c,d,e,f) __syscall6(n,a,b,c,d,e,f)
+#else
 #define __syscall_cp0(n) (__syscall_cp)(n,0,0,0,0,0,0)
 #define __syscall_cp1(n,a) (__syscall_cp)(n,__scc(a),0,0,0,0,0)
 #define __syscall_cp2(n,a,b) (__syscall_cp)(n,__scc(a),__scc(b),0,0,0,0)
@@ -63,6 +87,7 @@ long __syscall_ret(unsigned long), __syscall(syscall_arg_t, ...),
 #define __syscall_cp4(n,a,b,c,d) (__syscall_cp)(n,__scc(a),__scc(b),__scc(c),__scc(d),0,0)
 #define __syscall_cp5(n,a,b,c,d,e) (__syscall_cp)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),0)
 #define __syscall_cp6(n,a,b,c,d,e,f) (__syscall_cp)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f))
+#endif
 
 #define __syscall_cp(...) __SYSCALL_DISP(__syscall_cp,__VA_ARGS__)
 #define syscall_cp(...) __syscall_ret(__syscall_cp(__VA_ARGS__))
diff --git a/src/misc/syscall.c b/src/misc/syscall.c
index 9d435a97..013787a4 100644
--- a/src/misc/syscall.c
+++ b/src/misc/syscall.c
@@ -3,6 +3,7 @@
 
 #undef syscall
 
+#ifndef SYSCALL_STATIC
 long syscall(long n, ...)
 {
 	va_list ap;
@@ -17,3 +18,4 @@ long syscall(long n, ...)
 	va_end(ap);
 	return __syscall_ret(__syscall(n,a,b,c,d,e,f));
 }
+#endif
diff --git a/src/thread/pthread_cancel.c b/src/thread/pthread_cancel.c
index 3d229223..d14d96ed 100644
--- a/src/thread/pthread_cancel.c
+++ b/src/thread/pthread_cancel.c
@@ -20,6 +20,7 @@ long __syscall_cp_asm(volatile void *, syscall_arg_t,
                       syscall_arg_t, syscall_arg_t, syscall_arg_t,
                       syscall_arg_t, syscall_arg_t, syscall_arg_t);
 
+#ifndef SYSCALL_STATIC
 long __syscall_cp_c(syscall_arg_t nr,
                     syscall_arg_t u, syscall_arg_t v, syscall_arg_t w,
                     syscall_arg_t x, syscall_arg_t y, syscall_arg_t z)
@@ -38,6 +39,7 @@ long __syscall_cp_c(syscall_arg_t nr,
 		r = __cancel();
 	return r;
 }
+#endif
 
 static void _sigaddset(sigset_t *set, int sig)
 {
diff --git a/src/unistd/setegid.c b/src/unistd/setegid.c
index e6da2573..dc2702b6 100644
--- a/src/unistd/setegid.c
+++ b/src/unistd/setegid.c
@@ -1,8 +1,7 @@
 #include <unistd.h>
 #include "libc.h"
-#include "syscall.h"
 
 int setegid(gid_t egid)
 {
-	return __setxid(SYS_setresgid, -1, egid, -1);
+	return __setxid(xid_resgid, -1, egid, -1);
 }
diff --git a/src/unistd/seteuid.c b/src/unistd/seteuid.c
index ef8b9df4..d84d6186 100644
--- a/src/unistd/seteuid.c
+++ b/src/unistd/seteuid.c
@@ -1,8 +1,7 @@
 #include <unistd.h>
-#include "syscall.h"
 #include "libc.h"
 
 int seteuid(uid_t euid)
 {
-	return __setxid(SYS_setresuid, -1, euid, -1);
+	return __setxid(xid_resuid, -1, euid, -1);
 }
diff --git a/src/unistd/setgid.c b/src/unistd/setgid.c
index bae4616a..88197ec1 100644
--- a/src/unistd/setgid.c
+++ b/src/unistd/setgid.c
@@ -1,8 +1,7 @@
 #include <unistd.h>
-#include "syscall.h"
 #include "libc.h"
 
 int setgid(gid_t gid)
 {
-	return __setxid(SYS_setgid, gid, 0, 0);
+	return __setxid(xid_gid, gid, 0, 0);
 }
diff --git a/src/unistd/setregid.c b/src/unistd/setregid.c
index f5a8972a..4729c31b 100644
--- a/src/unistd/setregid.c
+++ b/src/unistd/setregid.c
@@ -1,8 +1,7 @@
 #include <unistd.h>
-#include "syscall.h"
 #include "libc.h"
 
 int setregid(gid_t rgid, gid_t egid)
 {
-	return __setxid(SYS_setregid, rgid, egid, 0);
+	return __setxid(xid_regid, rgid, egid, 0);
 }
diff --git a/src/unistd/setresgid.c b/src/unistd/setresgid.c
index b9af540a..929bb72b 100644
--- a/src/unistd/setresgid.c
+++ b/src/unistd/setresgid.c
@@ -1,9 +1,8 @@
 #define _GNU_SOURCE
 #include <unistd.h>
-#include "syscall.h"
 #include "libc.h"
 
 int setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 {
-	return __setxid(SYS_setresgid, rgid, egid, sgid);
+	return __setxid(xid_resgid, rgid, egid, sgid);
 }
diff --git a/src/unistd/setresuid.c b/src/unistd/setresuid.c
index 83692b4c..74b6ac33 100644
--- a/src/unistd/setresuid.c
+++ b/src/unistd/setresuid.c
@@ -1,9 +1,8 @@
 #define _GNU_SOURCE
 #include <unistd.h>
-#include "syscall.h"
 #include "libc.h"
 
 int setresuid(uid_t ruid, uid_t euid, uid_t suid)
 {
-	return __setxid(SYS_setresuid, ruid, euid, suid);
+	return __setxid(xid_resuid, ruid, euid, suid);
 }
diff --git a/src/unistd/setreuid.c b/src/unistd/setreuid.c
index 3fcc59e2..9b3efd6a 100644
--- a/src/unistd/setreuid.c
+++ b/src/unistd/setreuid.c
@@ -1,8 +1,7 @@
 #include <unistd.h>
-#include "syscall.h"
 #include "libc.h"
 
 int setreuid(uid_t ruid, uid_t euid)
 {
-	return __setxid(SYS_setreuid, ruid, euid, 0);
+	return __setxid(xid_reuid, ruid, euid, 0);
 }
diff --git a/src/unistd/setuid.c b/src/unistd/setuid.c
index 602ecbbf..de91d0c1 100644
--- a/src/unistd/setuid.c
+++ b/src/unistd/setuid.c
@@ -1,8 +1,7 @@
 #include <unistd.h>
-#include "syscall.h"
 #include "libc.h"
 
 int setuid(uid_t uid)
 {
-	return __setxid(SYS_setuid, uid, 0, 0);
+	return __setxid(xid_uid, uid, 0, 0);
 }
diff --git a/src/unistd/setxid.c b/src/unistd/setxid.c
index 0239f8af..6e11f507 100644
--- a/src/unistd/setxid.c
+++ b/src/unistd/setxid.c
@@ -6,14 +6,25 @@
 
 struct ctx {
 	int id, eid, sid;
-	int nr, err;
+	int xid, err;
 };
 
+static int setxid_syscall(int xid, int id, int eid, int sid)
+{
+	if (xid == xid_resuid) return __syscall(SYS_setresuid, id, eid, sid);
+	if (xid == xid_resgid) return __syscall(SYS_setresgid, id, eid, sid);
+	if (xid == xid_reuid) return __syscall(SYS_setreuid, id, eid);
+	if (xid == xid_regid) return __syscall(SYS_setregid, id, eid);
+	if (xid == xid_uid) return __syscall(SYS_setuid, id);
+	if (xid == xid_gid) return __syscall(SYS_setgid, id);
+	abort();
+}
+
 static void do_setxid(void *p)
 {
 	struct ctx *c = p;
 	if (c->err>0) return;
-	int ret = -__syscall(c->nr, c->id, c->eid, c->sid);
+	int ret = -setxid_syscall(c->xid, c->id, c->eid, c->sid);
 	if (ret && !c->err) {
 		/* If one thread fails to set ids after another has already
 		 * succeeded, forcibly killing the process is the only safe
@@ -25,11 +36,11 @@ static void do_setxid(void *p)
 	c->err = ret;
 }
 
-int __setxid(int nr, int id, int eid, int sid)
+int __setxid(int xid, int id, int eid, int sid)
 {
 	/* err is initially nonzero so that failure of the first thread does not
 	 * trigger the safety kill above. */
-	struct ctx c = { .nr = nr, .id = id, .eid = eid, .sid = sid, .err = -1 };
+	struct ctx c = { .xid = xid, .id = id, .eid = eid, .sid = sid, .err = -1 };
 	__synccall(do_setxid, &c);
 	if (c.err) {
 		if (c.err>0) errno = c.err;

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.