aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools/testing/selftests/rseq/rseq.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests/rseq/rseq.c')
-rw-r--r--tools/testing/selftests/rseq/rseq.c302
1 files changed, 227 insertions, 75 deletions
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index 7159eb777fd3..663a9cef1952 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -26,104 +26,260 @@
#include <assert.h>
#include <signal.h>
#include <limits.h>
+#include <dlfcn.h>
+#include <stddef.h>
+#include <sys/auxv.h>
+#include <linux/auxvec.h>
+#include <linux/compiler.h>
+
+#include "../kselftest.h"
#include "rseq.h"
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+/*
+ * Define weak versions to play nice with binaries that are statically linked
+ * against a libc that doesn't support registering its own rseq.
+ */
+__weak ptrdiff_t __rseq_offset;
+__weak unsigned int __rseq_size;
+__weak unsigned int __rseq_flags;
-__thread volatile struct rseq __rseq_abi = {
- .cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
-};
+static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset;
+static const unsigned int *libc_rseq_size_p = &__rseq_size;
+static const unsigned int *libc_rseq_flags_p = &__rseq_flags;
+
+/* Offset from the thread pointer to the rseq area. */
+ptrdiff_t rseq_offset;
/*
- * Shared with other libraries. This library may take rseq ownership if it is
- * still 0 when executing the library constructor. Set to 1 by library
- * constructor when handling rseq. Set to 0 in destructor if handling rseq.
+ * Size of the registered rseq area. 0 if the registration was
+ * unsuccessful.
*/
-int __rseq_handled;
+unsigned int rseq_size = -1U;
+
+/* Flags used during rseq registration. */
+unsigned int rseq_flags;
-/* Whether this library have ownership of rseq registration. */
static int rseq_ownership;
-static __thread volatile uint32_t __rseq_refcount;
+/* Allocate a large area for the TLS. */
+#define RSEQ_THREAD_AREA_ALLOC_SIZE 1024
+
+/* Original struct rseq feature size is 20 bytes. */
+#define ORIG_RSEQ_FEATURE_SIZE 20
+
+/* Original struct rseq allocation size is 32 bytes. */
+#define ORIG_RSEQ_ALLOC_SIZE 32
+
+/*
+ * Use a union to ensure we allocate a TLS area of 1024 bytes to accomodate an
+ * rseq registration that is larger than the current rseq ABI.
+ */
+union rseq_tls {
+ struct rseq_abi abi;
+ char dummy[RSEQ_THREAD_AREA_ALLOC_SIZE];
+};
+
+static
+__thread union rseq_tls __rseq __attribute__((tls_model("initial-exec"))) = {
+ .abi = {
+ .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
+ },
+};
-static void signal_off_save(sigset_t *oldset)
+static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
+ int flags, uint32_t sig)
{
- sigset_t set;
- int ret;
+ return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
+}
- sigfillset(&set);
- ret = pthread_sigmask(SIG_BLOCK, &set, oldset);
- if (ret)
- abort();
+static int sys_getcpu(unsigned *cpu, unsigned *node)
+{
+ return syscall(__NR_getcpu, cpu, node, NULL);
}
-static void signal_restore(sigset_t oldset)
+bool rseq_available(void)
{
- int ret;
+ int rc;
- ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
- if (ret)
+ rc = sys_rseq(NULL, 0, 0, 0);
+ if (rc != -1)
abort();
+ switch (errno) {
+ case ENOSYS:
+ return false;
+ case EINVAL:
+ return true;
+ default:
+ abort();
+ }
}
-static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len,
- int flags, uint32_t sig)
+/* The rseq areas need to be at least 32 bytes. */
+static
+unsigned int get_rseq_min_alloc_size(void)
{
- return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
+ unsigned int alloc_size = rseq_size;
+
+ if (alloc_size < ORIG_RSEQ_ALLOC_SIZE)
+ alloc_size = ORIG_RSEQ_ALLOC_SIZE;
+ return alloc_size;
+}
+
+/*
+ * Return the feature size supported by the kernel.
+ *
+ * Depending on the value returned by getauxval(AT_RSEQ_FEATURE_SIZE):
+ *
+ * 0: Return ORIG_RSEQ_FEATURE_SIZE (20)
+ * > 0: Return the value from getauxval(AT_RSEQ_FEATURE_SIZE).
+ *
+ * It should never return a value below ORIG_RSEQ_FEATURE_SIZE.
+ */
+static
+unsigned int get_rseq_kernel_feature_size(void)
+{
+ unsigned long auxv_rseq_feature_size, auxv_rseq_align;
+
+ auxv_rseq_align = getauxval(AT_RSEQ_ALIGN);
+ assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE);
+
+ auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE);
+ assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE);
+ if (auxv_rseq_feature_size)
+ return auxv_rseq_feature_size;
+ else
+ return ORIG_RSEQ_FEATURE_SIZE;
}
int rseq_register_current_thread(void)
{
- int rc, ret = 0;
- sigset_t oldset;
+ int rc;
- if (!rseq_ownership)
+ if (!rseq_ownership) {
+ /* Treat libc's ownership as a successful registration. */
return 0;
- signal_off_save(&oldset);
- if (__rseq_refcount == UINT_MAX) {
- ret = -1;
- goto end;
}
- if (__rseq_refcount++)
- goto end;
- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
- if (!rc) {
- assert(rseq_current_cpu_raw() >= 0);
- goto end;
+ rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG);
+ if (rc) {
+ /*
+ * After at least one thread has registered successfully
+ * (rseq_size > 0), the registration of other threads should
+ * never fail.
+ */
+ if (RSEQ_READ_ONCE(rseq_size) > 0) {
+ /* Incoherent success/failure within process. */
+ abort();
+ }
+ return -1;
}
- if (errno != EBUSY)
- __rseq_abi.cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED;
- ret = -1;
- __rseq_refcount--;
-end:
- signal_restore(oldset);
- return ret;
+ assert(rseq_current_cpu_raw() >= 0);
+
+ /*
+ * The first thread to register sets the rseq_size to mimic the libc
+ * behavior.
+ */
+ if (RSEQ_READ_ONCE(rseq_size) == 0) {
+ RSEQ_WRITE_ONCE(rseq_size, get_rseq_kernel_feature_size());
+ }
+
+ return 0;
}
int rseq_unregister_current_thread(void)
{
- int rc, ret = 0;
- sigset_t oldset;
+ int rc;
- if (!rseq_ownership)
+ if (!rseq_ownership) {
+ /* Treat libc's ownership as a successful unregistration. */
return 0;
- signal_off_save(&oldset);
- if (!__rseq_refcount) {
- ret = -1;
- goto end;
}
- if (--__rseq_refcount)
- goto end;
- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq),
- RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
- if (!rc)
- goto end;
- __rseq_refcount = 1;
- ret = -1;
-end:
- signal_restore(oldset);
- return ret;
+ rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
+ if (rc)
+ return -1;
+ return 0;
+}
+
+static __attribute__((constructor))
+void rseq_init(void)
+{
+ /*
+ * If the libc's registered rseq size isn't already valid, it may be
+ * because the binary is dynamically linked and not necessarily due to
+ * libc not having registered a restartable sequence. Try to find the
+ * symbols if that's the case.
+ */
+ if (!*libc_rseq_size_p) {
+ libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
+ libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
+ libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
+ }
+ if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
+ *libc_rseq_size_p != 0) {
+ unsigned int libc_rseq_size;
+
+ /* rseq registration owned by glibc */
+ rseq_offset = *libc_rseq_offset_p;
+ libc_rseq_size = *libc_rseq_size_p;
+ rseq_flags = *libc_rseq_flags_p;
+
+ /*
+ * Previous versions of glibc expose the value
+ * 32 even though the kernel only supported 20
+ * bytes initially. Therefore treat 32 as a
+ * special-case. glibc 2.40 exposes a 20 bytes
+ * __rseq_size without using getauxval(3) to
+ * query the supported size, while still allocating a 32
+ * bytes area. Also treat 20 as a special-case.
+ *
+ * Special-cases are handled by using the following
+ * value as active feature set size:
+ *
+ * rseq_size = min(32, get_rseq_kernel_feature_size())
+ */
+ switch (libc_rseq_size) {
+ case ORIG_RSEQ_FEATURE_SIZE:
+ fallthrough;
+ case ORIG_RSEQ_ALLOC_SIZE:
+ {
+ unsigned int rseq_kernel_feature_size = get_rseq_kernel_feature_size();
+
+ if (rseq_kernel_feature_size < ORIG_RSEQ_ALLOC_SIZE)
+ rseq_size = rseq_kernel_feature_size;
+ else
+ rseq_size = ORIG_RSEQ_ALLOC_SIZE;
+ break;
+ }
+ default:
+ /* Otherwise just use the __rseq_size from libc as rseq_size. */
+ rseq_size = libc_rseq_size;
+ break;
+ }
+ return;
+ }
+ rseq_ownership = 1;
+
+ /* Calculate the offset of the rseq area from the thread pointer. */
+ rseq_offset = (void *)&__rseq.abi - rseq_thread_pointer();
+
+ /* rseq flags are deprecated, always set to 0. */
+ rseq_flags = 0;
+
+ /*
+ * Set the size to 0 until at least one thread registers to mimic the
+ * libc behavior.
+ */
+ rseq_size = 0;
+}
+
+static __attribute__((destructor))
+void rseq_exit(void)
+{
+ if (!rseq_ownership)
+ return;
+ rseq_offset = 0;
+ rseq_size = -1U;
+ rseq_ownership = 0;
}
int32_t rseq_fallback_current_cpu(void)
@@ -138,19 +294,15 @@ int32_t rseq_fallback_current_cpu(void)
return cpu;
}
-void __attribute__((constructor)) rseq_init(void)
+int32_t rseq_fallback_current_node(void)
{
- /* Check whether rseq is handled by another library. */
- if (__rseq_handled)
- return;
- __rseq_handled = 1;
- rseq_ownership = 1;
-}
+ uint32_t cpu_id, node_id;
+ int ret;
-void __attribute__((destructor)) rseq_fini(void)
-{
- if (!rseq_ownership)
- return;
- __rseq_handled = 0;
- rseq_ownership = 0;
+ ret = sys_getcpu(&cpu_id, &node_id);
+ if (ret) {
+ perror("sys_getcpu()");
+ return ret;
+ }
+ return (int32_t) node_id;
}