|
Message-Id: <20220203193853.21511-2-mathieu.desnoyers@efficios.com> Date: Thu, 3 Feb 2022 14:38:52 -0500 From: Mathieu Desnoyers <mathieu.desnoyers@...icios.com> To: Peter Zijlstra <peterz@...radead.org> Cc: linux-kernel@...r.kernel.org, Thomas Gleixner <tglx@...utronix.de>, "Paul E . McKenney" <paulmck@...nel.org>, Boqun Feng <boqun.feng@...il.com>, "H . Peter Anvin" <hpa@...or.com>, Paul Turner <pjt@...gle.com>, linux-api@...r.kernel.org, Christian Brauner <christian.brauner@...ntu.com>, Florian Weimer <fw@...eb.enyo.de>, David.Laight@...LAB.COM, carlos@...hat.com, Peter Oskolkov <posk@...k.io>, libc-coord@...ts.openwall.com, Mathieu Desnoyers <mathieu.desnoyers@...icios.com> Subject: [RFC PATCH 2/3] rseq: Introduce extensible rseq ABI Introduce the extensible rseq ABI, where the feature size supported by the kernel and the required alignment are communicated to user-space through ELF auxiliary vectors. This allows user-space to call rseq registration with a rseq_len of either 32 bytes for the original struct rseq size (which includes padding), or larger. If rseq_len is larger than 32 bytes, then it must be large enough to contain the feature size communicated to user-space through ELF auxiliary vectors. Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@...icios.com> --- include/linux/sched.h | 4 ++++ kernel/ptrace.c | 2 +- kernel/rseq.c | 33 +++++++++++++++++++++++++++------ 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 508b91d57470..838c9e0b4cae 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1291,6 +1291,7 @@ struct task_struct { #ifdef CONFIG_RSEQ struct rseq __user *rseq; + u32 rseq_len; u32 rseq_sig; /* * RmW on rseq_event_mask must be performed atomically @@ -2260,10 +2261,12 @@ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) { if (clone_flags & CLONE_VM) { t->rseq = NULL; + t->rseq_len = 0; t->rseq_sig = 0; t->rseq_event_mask = 0; } else { t->rseq = current->rseq; + t->rseq_len = current->rseq_len; t->rseq_sig = current->rseq_sig; t->rseq_event_mask = current->rseq_event_mask; } @@ -2272,6 +2275,7 @@ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) static inline void rseq_execve(struct task_struct *t) { t->rseq = NULL; + t->rseq_len = 0; t->rseq_sig = 0; t->rseq_event_mask = 0; } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index eea265082e97..f5edde5b7805 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -800,7 +800,7 @@ static long ptrace_get_rseq_configuration(struct task_struct *task, { struct ptrace_rseq_configuration conf = { .rseq_abi_pointer = (u64)(uintptr_t)task->rseq, - .rseq_abi_size = sizeof(*task->rseq), + .rseq_abi_size = task->rseq_len, .signature = task->rseq_sig, .flags = 0, }; diff --git a/kernel/rseq.c b/kernel/rseq.c index 97ac20b4f738..46dc5c2ce2b7 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -18,6 +18,9 @@ #define CREATE_TRACE_POINTS #include <trace/events/rseq.h> +/* The original rseq structure size (including padding) is 32 bytes. */ +#define ORIG_RSEQ_SIZE 32 + #define RSEQ_CS_PREEMPT_MIGRATE_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE | \ RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT) @@ -86,10 +89,15 @@ static int rseq_update_cpu_id(struct task_struct *t) u32 cpu_id = raw_smp_processor_id(); struct rseq __user *rseq = t->rseq; - if (!user_write_access_begin(rseq, sizeof(*rseq))) + if (!user_write_access_begin(rseq, t->rseq_len)) goto efault; unsafe_put_user(cpu_id, &rseq->cpu_id_start, efault_end); unsafe_put_user(cpu_id, &rseq->cpu_id, efault_end); + /* + * Additional feature fields added after ORIG_RSEQ_SIZE + * need to be conditionally updated only if + * t->rseq_len != ORIG_RSEQ_SIZE. + */ user_write_access_end(); trace_rseq_update(t); return 0; @@ -116,6 +124,11 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t) */ if (put_user(cpu_id, &t->rseq->cpu_id)) return -EFAULT; + /* + * Additional feature fields added after ORIG_RSEQ_SIZE + * need to be conditionally reset only if + * t->rseq_len != ORIG_RSEQ_SIZE. + */ return 0; } @@ -336,7 +349,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, /* Unregister rseq for current thread. */ if (current->rseq != rseq || !current->rseq) return -EINVAL; - if (rseq_len != sizeof(*rseq)) + if (rseq_len != current->rseq_len) return -EINVAL; if (current->rseq_sig != sig) return -EPERM; @@ -345,6 +358,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, return ret; current->rseq = NULL; current->rseq_sig = 0; + current->rseq_len = 0; return 0; } @@ -357,7 +371,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, * the provided address differs from the prior * one. */ - if (current->rseq != rseq || rseq_len != sizeof(*rseq)) + if (current->rseq != rseq || rseq_len != current->rseq_len) return -EINVAL; if (current->rseq_sig != sig) return -EPERM; @@ -366,15 +380,22 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, } /* - * If there was no rseq previously registered, - * ensure the provided rseq is properly aligned and valid. + * If there was no rseq previously registered, ensure the provided rseq + * is properly aligned, as communcated to user-space through the ELF + * auxiliary vector AT_RSEQ_ALIGN. + * + * In order to be valid, rseq_len is either the original rseq size, or + * large enough to contain all supported fields, as communicated to + * user-space through the ELF auxiliary vector AT_RSEQ_FEATURE_SIZE. */ if (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) || - rseq_len != sizeof(*rseq)) + rseq_len < ORIG_RSEQ_SIZE || + (rseq_len != ORIG_RSEQ_SIZE && rseq_len < offsetof(struct rseq, end))) return -EINVAL; if (!access_ok(rseq, rseq_len)) return -EFAULT; current->rseq = rseq; + current->rseq_len = rseq_len; current->rseq_sig = sig; /* * If rseq was previously inactive, and has just been -- 2.17.1
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.