From e96d71359e9bbea846a2111e4469a03a055dfa6f Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 9 Jul 2018 15:51:50 -0400 Subject: rseq: Use __u64 for rseq_cs fields, validate user inputs Change the rseq ABI so rseq_cs start_ip, post_commit_offset and abort_ip fields are seen as 64-bit fields by both 32-bit and 64-bit kernels rather that ignoring the 32 upper bits on 32-bit kernels. This ensures we have a consistent behavior for a 32-bit binary executed on 32-bit kernels and in compat mode on 64-bit kernels. Validating the value of abort_ip field to be below TASK_SIZE ensures the kernel don't return to an invalid address when returning to userspace after an abort. I don't fully trust each architecture code to consistently deal with invalid return addresses. Validating the value of the start_ip and post_commit_offset fields prevents overflow on arithmetic performed on those values, used to check whether abort_ip is within the rseq critical section. If validation fails, the process is killed with a segmentation fault. When the signature encountered before abort_ip does not match the expected signature, return -EINVAL rather than -EPERM to be consistent with other input validation return codes from rseq_get_rseq_cs(). Signed-off-by: Mathieu Desnoyers Signed-off-by: Thomas Gleixner Cc: linux-api@vger.kernel.org Cc: Peter Zijlstra Cc: "Paul E . McKenney" Cc: Boqun Feng Cc: Andy Lutomirski Cc: Dave Watson Cc: Paul Turner Cc: Andrew Morton Cc: Russell King Cc: "H . Peter Anvin" Cc: Andi Kleen Cc: Chris Lameter Cc: Ben Maurer Cc: Steven Rostedt Cc: Josh Triplett Cc: Linus Torvalds Cc: Catalin Marinas Cc: Will Deacon Cc: Michael Kerrisk Cc: Joel Fernandes Cc: "Paul E. McKenney" Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20180709195155.7654-2-mathieu.desnoyers@efficios.com --- include/uapi/linux/rseq.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index d620fa43756c..519ad6e176d1 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h @@ -52,10 +52,10 @@ struct rseq_cs { __u32 version; /* enum rseq_cs_flags */ __u32 flags; - LINUX_FIELD_u32_u64(start_ip); + __u64 start_ip; /* Offset from start_ip. */ - LINUX_FIELD_u32_u64(post_commit_offset); - LINUX_FIELD_u32_u64(abort_ip); + __u64 post_commit_offset; + __u64 abort_ip; } __attribute__((aligned(4 * sizeof(__u64)))); /* -- cgit v1.2.3-59-g8ed1b From 0fb9a1abc8c97f858997e962694eb36b4517144e Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 9 Jul 2018 15:51:52 -0400 Subject: rseq: uapi: Update uapi comments Update rseq uapi header comments to reflect that user-space need to do thread-local loads/stores from/to the struct rseq fields. As a consequence of this added requirement, the kernel does not need to perform loads/stores with single-copy atomicity. Update the comment associated to the "flags" fields to describe more accurately that it's only useful to facilitate single-stepping through rseq critical sections with debuggers. Signed-off-by: Mathieu Desnoyers Signed-off-by: Thomas Gleixner Cc: linux-api@vger.kernel.org Cc: Peter Zijlstra Cc: "Paul E . McKenney" Cc: Boqun Feng Cc: Andy Lutomirski Cc: Dave Watson Cc: Paul Turner Cc: Andrew Morton Cc: Russell King Cc: "H . Peter Anvin" Cc: Andi Kleen Cc: Chris Lameter Cc: Ben Maurer Cc: Steven Rostedt Cc: Josh Triplett Cc: Linus Torvalds Cc: Catalin Marinas Cc: Will Deacon Cc: Michael Kerrisk Cc: Joel Fernandes Cc: "Paul E. McKenney" Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20180709195155.7654-4-mathieu.desnoyers@efficios.com --- include/uapi/linux/rseq.h | 69 ++++++++++++++++++++++++----------------------- kernel/rseq.c | 2 +- 2 files changed, 37 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index 519ad6e176d1..bf4188c13bec 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h @@ -67,28 +67,30 @@ struct rseq_cs { struct rseq { /* * Restartable sequences cpu_id_start field. Updated by the - * kernel, and read by user-space with single-copy atomicity - * semantics. Aligned on 32-bit. Always contains a value in the - * range of possible CPUs, although the value may not be the - * actual current CPU (e.g. if rseq is not initialized). This - * CPU number value should always be compared against the value - * of the cpu_id field before performing a rseq commit or - * returning a value read from a data structure indexed using - * the cpu_id_start value. + * kernel. Read by user-space with single-copy atomicity + * semantics. This field should only be read by the thread which + * registered this data structure. Aligned on 32-bit. Always + * contains a value in the range of possible CPUs, although the + * value may not be the actual current CPU (e.g. if rseq is not + * initialized). This CPU number value should always be compared + * against the value of the cpu_id field before performing a rseq + * commit or returning a value read from a data structure indexed + * using the cpu_id_start value. */ __u32 cpu_id_start; /* - * Restartable sequences cpu_id field. Updated by the kernel, - * and read by user-space with single-copy atomicity semantics. - * Aligned on 32-bit. Values RSEQ_CPU_ID_UNINITIALIZED and - * RSEQ_CPU_ID_REGISTRATION_FAILED have a special semantic: the - * former means "rseq uninitialized", and latter means "rseq - * initialization failed". This value is meant to be read within - * rseq critical sections and compared with the cpu_id_start - * value previously read, before performing the commit instruction, - * or read and compared with the cpu_id_start value before returning - * a value loaded from a data structure indexed using the - * cpu_id_start value. + * Restartable sequences cpu_id field. Updated by the kernel. + * Read by user-space with single-copy atomicity semantics. This + * field should only be read by the thread which registered this + * data structure. Aligned on 32-bit. Values + * RSEQ_CPU_ID_UNINITIALIZED and RSEQ_CPU_ID_REGISTRATION_FAILED + * have a special semantic: the former means "rseq uninitialized", + * and latter means "rseq initialization failed". This value is + * meant to be read within rseq critical sections and compared + * with the cpu_id_start value previously read, before performing + * the commit instruction, or read and compared with the + * cpu_id_start value before returning a value loaded from a data + * structure indexed using the cpu_id_start value. */ __u32 cpu_id; /* @@ -105,27 +107,28 @@ struct rseq { * targeted by the rseq_cs. Also needs to be set to NULL by user-space * before reclaiming memory that contains the targeted struct rseq_cs. * - * Read and set by the kernel with single-copy atomicity semantics. - * Set by user-space with single-copy atomicity semantics. Aligned - * on 64-bit. + * Read and set by the kernel. Set by user-space with single-copy + * atomicity semantics. This field should only be updated by the + * thread which registered this data structure. Aligned on 64-bit. */ LINUX_FIELD_u32_u64(rseq_cs); /* - * - RSEQ_DISABLE flag: + * Restartable sequences flags field. + * + * This field should only be updated by the thread which + * registered this data structure. Read by the kernel. + * Mainly used for single-stepping through rseq critical sections + * with debuggers. * - * Fallback fast-track flag for single-stepping. - * Set by user-space if lack of progress is detected. - * Cleared by user-space after rseq finish. - * Read by the kernel. * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT - * Inhibit instruction sequence block restart and event - * counter increment on preemption for this thread. + * Inhibit instruction sequence block restart on preemption + * for this thread. * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL - * Inhibit instruction sequence block restart and event - * counter increment on signal delivery for this thread. + * Inhibit instruction sequence block restart on signal + * delivery for this thread. * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE - * Inhibit instruction sequence block restart and event - * counter increment on migration for this thread. + * Inhibit instruction sequence block restart on migration for + * this thread. */ __u32 flags; } __attribute__((aligned(4 * sizeof(__u64)))); diff --git a/kernel/rseq.c b/kernel/rseq.c index 2c8463acb50d..2a7748675be7 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -201,7 +201,7 @@ static int clear_rseq_cs(struct task_struct *t) * of code outside of the rseq assembly block. This performs * a lazy clear of the rseq_cs field. * - * Set rseq_cs to NULL with single-copy atomicity. + * Set rseq_cs to NULL. */ return put_user(0UL, &t->rseq->rseq_cs); } -- cgit v1.2.3-59-g8ed1b From ec9c82e03a744e5698bd95eab872855861a821fa Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 9 Jul 2018 15:51:53 -0400 Subject: rseq: uapi: Declare rseq_cs field as union, update includes Declaring the rseq_cs field as a union between __u64 and two __u32 allows both 32-bit and 64-bit kernels to read the full __u64, and therefore validate that a 32-bit user-space cleared the upper 32 bits, thus ensuring a consistent behavior between native 32-bit kernels and 32-bit compat tasks on 64-bit kernels. Check that the rseq_cs value read is < TASK_SIZE. The asm/byteorder.h header needs to be included by rseq.h, now that it is not using linux/types_32_64.h anymore. Considering that only __32 and __u64 types are declared in linux/rseq.h, the linux/types.h header should always be included for both kernel and user-space code: including stdint.h is just for u64 and u32, which are not used in this header at all. Use copy_from_user()/clear_user() to interact with a 64-bit field, because arm32 does not implement 64-bit __get_user, and ppc32 does not 64-bit get_user. Considering that the rseq_cs pointer does not need to be loaded/stored with single-copy atomicity from the kernel anymore, we can simply use copy_from_user()/clear_user(). Signed-off-by: Mathieu Desnoyers Signed-off-by: Thomas Gleixner Cc: linux-api@vger.kernel.org Cc: Peter Zijlstra Cc: "Paul E . McKenney" Cc: Boqun Feng Cc: Andy Lutomirski Cc: Dave Watson Cc: Paul Turner Cc: Andrew Morton Cc: Russell King Cc: "H . Peter Anvin" Cc: Andi Kleen Cc: Chris Lameter Cc: Ben Maurer Cc: Steven Rostedt Cc: Josh Triplett Cc: Linus Torvalds Cc: Catalin Marinas Cc: Will Deacon Cc: Michael Kerrisk Cc: Joel Fernandes Cc: "Paul E. McKenney" Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20180709195155.7654-5-mathieu.desnoyers@efficios.com --- include/uapi/linux/rseq.h | 27 +++++++++++++++++++-------- kernel/rseq.c | 15 +++++++++------ tools/testing/selftests/rseq/rseq.h | 11 ++++++++++- 3 files changed, 38 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index bf4188c13bec..9a402fdb60e9 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h @@ -10,13 +10,8 @@ * Copyright (c) 2015-2018 Mathieu Desnoyers */ -#ifdef __KERNEL__ -# include -#else -# include -#endif - -#include +#include +#include enum rseq_cpu_id_state { RSEQ_CPU_ID_UNINITIALIZED = -1, @@ -111,7 +106,23 @@ struct rseq { * atomicity semantics. This field should only be updated by the * thread which registered this data structure. Aligned on 64-bit. */ - LINUX_FIELD_u32_u64(rseq_cs); + union { + __u64 ptr64; +#ifdef __LP64__ + __u64 ptr; +#else + struct { +#if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)) || defined(__BIG_ENDIAN) + __u32 padding; /* Initialized to zero. */ + __u32 ptr32; +#else /* LITTLE */ + __u32 ptr32; + __u32 padding; /* Initialized to zero. */ +#endif /* ENDIAN */ + } ptr; +#endif + } rseq_cs; + /* * Restartable sequences flags field. * diff --git a/kernel/rseq.c b/kernel/rseq.c index 2a7748675be7..c6242d8594dc 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -115,19 +115,20 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t) static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs) { struct rseq_cs __user *urseq_cs; - unsigned long ptr; + u64 ptr; u32 __user *usig; u32 sig; int ret; - ret = get_user(ptr, &t->rseq->rseq_cs); - if (ret) - return ret; + if (copy_from_user(&ptr, &t->rseq->rseq_cs.ptr64, sizeof(ptr))) + return -EFAULT; if (!ptr) { memset(rseq_cs, 0, sizeof(*rseq_cs)); return 0; } - urseq_cs = (struct rseq_cs __user *)ptr; + if (ptr >= TASK_SIZE) + return -EINVAL; + urseq_cs = (struct rseq_cs __user *)(unsigned long)ptr; if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs))) return -EFAULT; @@ -203,7 +204,9 @@ static int clear_rseq_cs(struct task_struct *t) * * Set rseq_cs to NULL. */ - return put_user(0UL, &t->rseq->rseq_cs); + if (clear_user(&t->rseq->rseq_cs.ptr64, sizeof(t->rseq->rseq_cs.ptr64))) + return -EFAULT; + return 0; } /* diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index a4684112676c..f2073cfa4448 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -133,6 +133,15 @@ static inline uint32_t rseq_current_cpu(void) return cpu; } +static inline void rseq_clear_rseq_cs(void) +{ +#ifdef __LP64__ + __rseq_abi.rseq_cs.ptr = 0; +#else + __rseq_abi.rseq_cs.ptr.ptr32 = 0; +#endif +} + /* * rseq_prepare_unload() should be invoked by each thread using rseq_finish*() * at least once between their last rseq_finish*() and library unload of the @@ -143,7 +152,7 @@ static inline uint32_t rseq_current_cpu(void) */ static inline void rseq_prepare_unload(void) { - __rseq_abi.rseq_cs = 0; + rseq_clear_rseq_cs(); } #endif /* RSEQ_H_ */ -- cgit v1.2.3-59-g8ed1b From 4f4c0acdf4652a964da869d578a3c8bf6df14ce2 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 9 Jul 2018 15:51:54 -0400 Subject: rseq: Remove unused types_32_64.h uapi header This header was introduced in the 4.18 merge window, and rseq does not need it anymore. Nuke it before the final release. Signed-off-by: Mathieu Desnoyers Signed-off-by: Thomas Gleixner Cc: linux-api@vger.kernel.org Cc: Peter Zijlstra Cc: "Paul E . McKenney" Cc: Boqun Feng Cc: Andy Lutomirski Cc: Dave Watson Cc: Paul Turner Cc: Andrew Morton Cc: Russell King Cc: "H . Peter Anvin" Cc: Andi Kleen Cc: Chris Lameter Cc: Ben Maurer Cc: Steven Rostedt Cc: Josh Triplett Cc: Linus Torvalds Cc: Catalin Marinas Cc: Will Deacon Cc: Michael Kerrisk Cc: Joel Fernandes Cc: "Paul E. McKenney" Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20180709195155.7654-6-mathieu.desnoyers@efficios.com --- include/uapi/linux/types_32_64.h | 50 ---------------------------------------- 1 file changed, 50 deletions(-) delete mode 100644 include/uapi/linux/types_32_64.h (limited to 'include') diff --git a/include/uapi/linux/types_32_64.h b/include/uapi/linux/types_32_64.h deleted file mode 100644 index 0a87ace34a57..000000000000 --- a/include/uapi/linux/types_32_64.h +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ -#ifndef _UAPI_LINUX_TYPES_32_64_H -#define _UAPI_LINUX_TYPES_32_64_H - -/* - * linux/types_32_64.h - * - * Integer type declaration for pointers across 32-bit and 64-bit systems. - * - * Copyright (c) 2015-2018 Mathieu Desnoyers - */ - -#ifdef __KERNEL__ -# include -#else -# include -#endif - -#include - -#ifdef __BYTE_ORDER -# if (__BYTE_ORDER == __BIG_ENDIAN) -# define LINUX_BYTE_ORDER_BIG_ENDIAN -# else -# define LINUX_BYTE_ORDER_LITTLE_ENDIAN -# endif -#else -# ifdef __BIG_ENDIAN -# define LINUX_BYTE_ORDER_BIG_ENDIAN -# else -# define LINUX_BYTE_ORDER_LITTLE_ENDIAN -# endif -#endif - -#ifdef __LP64__ -# define LINUX_FIELD_u32_u64(field) __u64 field -# define LINUX_FIELD_u32_u64_INIT_ONSTACK(field, v) field = (intptr_t)v -#else -# ifdef LINUX_BYTE_ORDER_BIG_ENDIAN -# define LINUX_FIELD_u32_u64(field) __u32 field ## _padding, field -# define LINUX_FIELD_u32_u64_INIT_ONSTACK(field, v) \ - field ## _padding = 0, field = (intptr_t)v -# else -# define LINUX_FIELD_u32_u64(field) __u32 field, field ## _padding -# define LINUX_FIELD_u32_u64_INIT_ONSTACK(field, v) \ - field = (intptr_t)v, field ## _padding = 0 -# endif -#endif - -#endif /* _UAPI_LINUX_TYPES_32_64_H */ -- cgit v1.2.3-59-g8ed1b