aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/bpf/stackmap.c2
-rw-r--r--kernel/dma/remap.c2
-rw-r--r--kernel/fork.c2
-rwxr-xr-xkernel/gen_kheaders.sh64
-rw-r--r--kernel/irq/chip.c44
-rw-r--r--kernel/irq/irqdesc.c2
-rw-r--r--kernel/irq_work.c34
-rw-r--r--kernel/kcov.c547
-rw-r--r--kernel/module.c4
-rw-r--r--kernel/notifier.c41
-rw-r--r--kernel/power/qos.c4
-rw-r--r--kernel/printk/printk.c2
-rw-r--r--kernel/profile.c6
-rw-r--r--kernel/sched/cpufreq_schedutil.c2
-rw-r--r--kernel/sys.c4
-rw-r--r--kernel/time/hrtimer.c11
-rw-r--r--kernel/time/time.c26
-rw-r--r--kernel/trace/Kconfig9
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/bpf_trace.c2
-rw-r--r--kernel/trace/ring_buffer.c6
-rw-r--r--kernel/trace/trace.c17
-rw-r--r--kernel/trace/trace.h1
-rw-r--r--kernel/trace/trace_events.c6
-rw-r--r--kernel/trace/trace_events_inject.c331
26 files changed, 1022 insertions, 150 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index f0902a7bd1b3..f2cc0d118a0b 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -130,7 +130,7 @@ $(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
$(obj)/kheaders.o: $(obj)/kheaders_data.tar.xz
quiet_cmd_genikh = CHK $(obj)/kheaders_data.tar.xz
- cmd_genikh = $(BASH) $(srctree)/kernel/gen_kheaders.sh $@
+ cmd_genikh = $(CONFIG_SHELL) $(srctree)/kernel/gen_kheaders.sh $@
$(obj)/kheaders_data.tar.xz: FORCE
$(call cmd,genikh)
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index caca752ee5e6..3f958b90d914 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -289,7 +289,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
if (irqs_disabled()) {
work = this_cpu_ptr(&up_read_work);
- if (work->irq_work.flags & IRQ_WORK_BUSY)
+ if (atomic_read(&work->irq_work.flags) & IRQ_WORK_BUSY)
/* cannot queue more up_read, fallback */
irq_work_busy = true;
}
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index d47bd40fc0f5..d14cbc83986a 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -178,7 +178,7 @@ bool dma_in_atomic_pool(void *start, size_t size)
if (unlikely(!atomic_pool))
return false;
- return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
+ return gen_pool_has_addr(atomic_pool, (unsigned long)start, size);
}
void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
diff --git a/kernel/fork.c b/kernel/fork.c
index 21c6c1e29b98..2508a4f238a3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2185,7 +2185,7 @@ static __latent_entropy struct task_struct *copy_process(
*/
p->start_time = ktime_get_ns();
- p->real_start_time = ktime_get_boottime_ns();
+ p->start_boottime = ktime_get_boottime_ns();
/*
* Make it visible to the rest of the system, but dont wake it up yet.
diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh
index 5a0fc0b0403a..e13ca842eb7e 100755
--- a/kernel/gen_kheaders.sh
+++ b/kernel/gen_kheaders.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# This script generates an archive consisting of kernel headers
@@ -21,30 +21,38 @@ arch/$SRCARCH/include/
# Uncomment it for debugging.
# if [ ! -f /tmp/iter ]; then iter=1; echo 1 > /tmp/iter;
# else iter=$(($(cat /tmp/iter) + 1)); echo $iter > /tmp/iter; fi
-# find $src_file_list -name "*.h" | xargs ls -l > /tmp/src-ls-$iter
-# find $obj_file_list -name "*.h" | xargs ls -l > /tmp/obj-ls-$iter
+# find $all_dirs -name "*.h" | xargs ls -l > /tmp/ls-$iter
+
+all_dirs=
+if [ "$building_out_of_srctree" ]; then
+ for d in $dir_list; do
+ all_dirs="$all_dirs $srctree/$d"
+ done
+fi
+all_dirs="$all_dirs $dir_list"
# include/generated/compile.h is ignored because it is touched even when none
-# of the source files changed. This causes pointless regeneration, so let us
-# ignore them for md5 calculation.
-pushd $srctree > /dev/null
-src_files_md5="$(find $dir_list -name "*.h" |
- grep -v "include/generated/compile.h" |
- grep -v "include/generated/autoconf.h" |
- xargs ls -l | md5sum | cut -d ' ' -f1)"
-popd > /dev/null
-obj_files_md5="$(find $dir_list -name "*.h" |
- grep -v "include/generated/compile.h" |
- grep -v "include/generated/autoconf.h" |
+# of the source files changed.
+#
+# When Kconfig regenerates include/generated/autoconf.h, its timestamp is
+# updated, but the contents might be still the same. When any CONFIG option is
+# changed, Kconfig touches the corresponding timestamp file include/config/*.h.
+# Hence, the md5sum detects the configuration change anyway. We do not need to
+# check include/generated/autoconf.h explicitly.
+#
+# Ignore them for md5 calculation to avoid pointless regeneration.
+headers_md5="$(find $all_dirs -name "*.h" |
+ grep -v "include/generated/compile.h" |
+ grep -v "include/generated/autoconf.h" |
xargs ls -l | md5sum | cut -d ' ' -f1)"
+
# Any changes to this script will also cause a rebuild of the archive.
this_file_md5="$(ls -l $sfile | md5sum | cut -d ' ' -f1)"
if [ -f $tarfile ]; then tarfile_md5="$(md5sum $tarfile | cut -d ' ' -f1)"; fi
if [ -f kernel/kheaders.md5 ] &&
- [ "$(cat kernel/kheaders.md5|head -1)" == "$src_files_md5" ] &&
- [ "$(cat kernel/kheaders.md5|head -2|tail -1)" == "$obj_files_md5" ] &&
- [ "$(cat kernel/kheaders.md5|head -3|tail -1)" == "$this_file_md5" ] &&
- [ "$(cat kernel/kheaders.md5|tail -1)" == "$tarfile_md5" ]; then
+ [ "$(head -n 1 kernel/kheaders.md5)" = "$headers_md5" ] &&
+ [ "$(head -n 2 kernel/kheaders.md5 | tail -n 1)" = "$this_file_md5" ] &&
+ [ "$(tail -n 1 kernel/kheaders.md5)" = "$tarfile_md5" ]; then
exit
fi
@@ -55,14 +63,17 @@ fi
rm -rf $cpio_dir
mkdir $cpio_dir
-pushd $srctree > /dev/null
-for f in $dir_list;
- do find "$f" -name "*.h";
-done | cpio --quiet -pd $cpio_dir
-popd > /dev/null
+if [ "$building_out_of_srctree" ]; then
+ (
+ cd $srctree
+ for f in $dir_list
+ do find "$f" -name "*.h";
+ done | cpio --quiet -pd $cpio_dir
+ )
+fi
-# The second CPIO can complain if files already exist which can
-# happen with out of tree builds. Just silence CPIO for now.
+# The second CPIO can complain if files already exist which can happen with out
+# of tree builds having stale headers in srctree. Just silence CPIO for now.
for f in $dir_list;
do find "$f" -name "*.h";
done | cpio --quiet -pd $cpio_dir >/dev/null 2>&1
@@ -79,8 +90,7 @@ find $cpio_dir -printf "./%P\n" | LC_ALL=C sort | \
--owner=0 --group=0 --numeric-owner --no-recursion \
-Jcf $tarfile -C $cpio_dir/ -T - > /dev/null
-echo "$src_files_md5" > kernel/kheaders.md5
-echo "$obj_files_md5" >> kernel/kheaders.md5
+echo $headers_md5 > kernel/kheaders.md5
echo "$this_file_md5" >> kernel/kheaders.md5
echo "$(md5sum $tarfile | cut -d ' ' -f1)" >> kernel/kheaders.md5
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index b76703b2c0af..b3fa2d87d2f3 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -1298,6 +1298,50 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_mask_irq);
#endif /* CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS */
/**
+ * irq_chip_set_parent_state - set the state of a parent interrupt.
+ *
+ * @data: Pointer to interrupt specific data
+ * @which: State to be restored (one of IRQCHIP_STATE_*)
+ * @val: Value corresponding to @which
+ *
+ * Conditional success, if the underlying irqchip does not implement it.
+ */
+int irq_chip_set_parent_state(struct irq_data *data,
+ enum irqchip_irq_state which,
+ bool val)
+{
+ data = data->parent_data;
+
+ if (!data || !data->chip->irq_set_irqchip_state)
+ return 0;
+
+ return data->chip->irq_set_irqchip_state(data, which, val);
+}
+EXPORT_SYMBOL_GPL(irq_chip_set_parent_state);
+
+/**
+ * irq_chip_get_parent_state - get the state of a parent interrupt.
+ *
+ * @data: Pointer to interrupt specific data
+ * @which: one of IRQCHIP_STATE_* the caller wants to know
+ * @state: a pointer to a boolean where the state is to be stored
+ *
+ * Conditional success, if the underlying irqchip does not implement it.
+ */
+int irq_chip_get_parent_state(struct irq_data *data,
+ enum irqchip_irq_state which,
+ bool *state)
+{
+ data = data->parent_data;
+
+ if (!data || !data->chip->irq_get_irqchip_state)
+ return 0;
+
+ return data->chip->irq_get_irqchip_state(data, which, state);
+}
+EXPORT_SYMBOL_GPL(irq_chip_get_parent_state);
+
+/**
* irq_chip_enable_parent - Enable the parent interrupt (defaults to unmask if
* NULL)
* @data: Pointer to interrupt specific data
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 9be995fc3c5a..5b8fdd659e54 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -750,7 +750,7 @@ void irq_free_descs(unsigned int from, unsigned int cnt)
EXPORT_SYMBOL_GPL(irq_free_descs);
/**
- * irq_alloc_descs - allocate and initialize a range of irq descriptors
+ * __irq_alloc_descs - allocate and initialize a range of irq descriptors
* @irq: Allocate for specific irq number if irq >= 0
* @from: Start the search from this irq number
* @cnt: Number of consecutive irqs to allocate.
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index d42acaf81886..828cc30774bc 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -29,24 +29,16 @@ static DEFINE_PER_CPU(struct llist_head, lazy_list);
*/
static bool irq_work_claim(struct irq_work *work)
{
- unsigned long flags, oflags, nflags;
+ int oflags;
+ oflags = atomic_fetch_or(IRQ_WORK_CLAIMED, &work->flags);
/*
- * Start with our best wish as a premise but only trust any
- * flag value after cmpxchg() result.
+ * If the work is already pending, no need to raise the IPI.
+ * The pairing atomic_fetch_andnot() in irq_work_run() makes sure
+ * everything we did before is visible.
*/
- flags = work->flags & ~IRQ_WORK_PENDING;
- for (;;) {
- nflags = flags | IRQ_WORK_CLAIMED;
- oflags = cmpxchg(&work->flags, flags, nflags);
- if (oflags == flags)
- break;
- if (oflags & IRQ_WORK_PENDING)
- return false;
- flags = oflags;
- cpu_relax();
- }
-
+ if (oflags & IRQ_WORK_PENDING)
+ return false;
return true;
}
@@ -61,7 +53,7 @@ void __weak arch_irq_work_raise(void)
static void __irq_work_queue_local(struct irq_work *work)
{
/* If the work is "lazy", handle it from next tick if any */
- if (work->flags & IRQ_WORK_LAZY) {
+ if (atomic_read(&work->flags) & IRQ_WORK_LAZY) {
if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) &&
tick_nohz_tick_stopped())
arch_irq_work_raise();
@@ -143,7 +135,6 @@ static void irq_work_run_list(struct llist_head *list)
{
struct irq_work *work, *tmp;
struct llist_node *llnode;
- unsigned long flags;
BUG_ON(!irqs_disabled());
@@ -152,6 +143,7 @@ static void irq_work_run_list(struct llist_head *list)
llnode = llist_del_all(list);
llist_for_each_entry_safe(work, tmp, llnode, llnode) {
+ int flags;
/*
* Clear the PENDING bit, after this point the @work
* can be re-used.
@@ -159,15 +151,15 @@ static void irq_work_run_list(struct llist_head *list)
* to claim that work don't rely on us to handle their data
* while we are in the middle of the func.
*/
- flags = work->flags & ~IRQ_WORK_PENDING;
- xchg(&work->flags, flags);
+ flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags);
work->func(work);
/*
* Clear the BUSY bit and return to the free state if
* no-one else claimed it meanwhile.
*/
- (void)cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
+ flags &= ~IRQ_WORK_PENDING;
+ (void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
}
}
@@ -199,7 +191,7 @@ void irq_work_sync(struct irq_work *work)
{
lockdep_assert_irqs_enabled();
- while (work->flags & IRQ_WORK_BUSY)
+ while (atomic_read(&work->flags) & IRQ_WORK_BUSY)
cpu_relax();
}
EXPORT_SYMBOL_GPL(irq_work_sync);
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 2ee38727844a..f50354202dbe 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -9,6 +9,7 @@
#include <linux/types.h>
#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/hashtable.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/preempt.h>
@@ -21,8 +22,11 @@
#include <linux/uaccess.h>
#include <linux/kcov.h>
#include <linux/refcount.h>
+#include <linux/log2.h>
#include <asm/setup.h>
+#define kcov_debug(fmt, ...) pr_debug("%s: " fmt, __func__, ##__VA_ARGS__)
+
/* Number of 64-bit words written per one comparison: */
#define KCOV_WORDS_PER_CMP 4
@@ -44,19 +48,100 @@ struct kcov {
* Reference counter. We keep one for:
* - opened file descriptor
* - task with enabled coverage (we can't unwire it from another task)
+ * - each code section for remote coverage collection
*/
refcount_t refcount;
/* The lock protects mode, size, area and t. */
spinlock_t lock;
enum kcov_mode mode;
- /* Size of arena (in long's for KCOV_MODE_TRACE). */
- unsigned size;
+ /* Size of arena (in long's). */
+ unsigned int size;
/* Coverage buffer shared with user space. */
void *area;
/* Task for which we collect coverage, or NULL. */
struct task_struct *t;
+ /* Collecting coverage from remote (background) threads. */
+ bool remote;
+ /* Size of remote area (in long's). */
+ unsigned int remote_size;
+ /*
+ * Sequence is incremented each time kcov is reenabled, used by
+ * kcov_remote_stop(), see the comment there.
+ */
+ int sequence;
};
+struct kcov_remote_area {
+ struct list_head list;
+ unsigned int size;
+};
+
+struct kcov_remote {
+ u64 handle;
+ struct kcov *kcov;
+ struct hlist_node hnode;
+};
+
+static DEFINE_SPINLOCK(kcov_remote_lock);
+static DEFINE_HASHTABLE(kcov_remote_map, 4);
+static struct list_head kcov_remote_areas = LIST_HEAD_INIT(kcov_remote_areas);
+
+/* Must be called with kcov_remote_lock locked. */
+static struct kcov_remote *kcov_remote_find(u64 handle)
+{
+ struct kcov_remote *remote;
+
+ hash_for_each_possible(kcov_remote_map, remote, hnode, handle) {
+ if (remote->handle == handle)
+ return remote;
+ }
+ return NULL;
+}
+
+static struct kcov_remote *kcov_remote_add(struct kcov *kcov, u64 handle)
+{
+ struct kcov_remote *remote;
+
+ if (kcov_remote_find(handle))
+ return ERR_PTR(-EEXIST);
+ remote = kmalloc(sizeof(*remote), GFP_ATOMIC);
+ if (!remote)
+ return ERR_PTR(-ENOMEM);
+ remote->handle = handle;
+ remote->kcov = kcov;
+ hash_add(kcov_remote_map, &remote->hnode, handle);
+ return remote;
+}
+
+/* Must be called with kcov_remote_lock locked. */
+static struct kcov_remote_area *kcov_remote_area_get(unsigned int size)
+{
+ struct kcov_remote_area *area;
+ struct list_head *pos;
+
+ kcov_debug("size = %u\n", size);
+ list_for_each(pos, &kcov_remote_areas) {
+ area = list_entry(pos, struct kcov_remote_area, list);
+ if (area->size == size) {
+ list_del(&area->list);
+ kcov_debug("rv = %px\n", area);
+ return area;
+ }
+ }
+ kcov_debug("rv = NULL\n");
+ return NULL;
+}
+
+/* Must be called with kcov_remote_lock locked. */
+static void kcov_remote_area_put(struct kcov_remote_area *area,
+ unsigned int size)
+{
+ kcov_debug("area = %px, size = %u\n", area, size);
+ INIT_LIST_HEAD(&area->list);
+ area->size = size;
+ list_add(&area->list, &kcov_remote_areas);
+}
+
static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t)
{
unsigned int mode;
@@ -73,7 +158,7 @@ static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_stru
* in_interrupt() returns false (e.g. preempt_schedule_irq()).
* READ_ONCE()/barrier() effectively provides load-acquire wrt
* interrupts, there are paired barrier()/WRITE_ONCE() in
- * kcov_ioctl_locked().
+ * kcov_start().
*/
barrier();
return mode == needed_mode;
@@ -227,6 +312,78 @@ void notrace __sanitizer_cov_trace_switch(u64 val, u64 *cases)
EXPORT_SYMBOL(__sanitizer_cov_trace_switch);
#endif /* ifdef CONFIG_KCOV_ENABLE_COMPARISONS */
+static void kcov_start(struct task_struct *t, unsigned int size,
+ void *area, enum kcov_mode mode, int sequence)
+{
+ kcov_debug("t = %px, size = %u, area = %px\n", t, size, area);
+ /* Cache in task struct for performance. */
+ t->kcov_size = size;
+ t->kcov_area = area;
+ /* See comment in check_kcov_mode(). */
+ barrier();
+ WRITE_ONCE(t->kcov_mode, mode);
+ t->kcov_sequence = sequence;
+}
+
+static void kcov_stop(struct task_struct *t)
+{
+ WRITE_ONCE(t->kcov_mode, KCOV_MODE_DISABLED);
+ barrier();
+ t->kcov_size = 0;
+ t->kcov_area = NULL;
+}
+
+static void kcov_task_reset(struct task_struct *t)
+{
+ kcov_stop(t);
+ t->kcov = NULL;
+ t->kcov_sequence = 0;
+ t->kcov_handle = 0;
+}
+
+void kcov_task_init(struct task_struct *t)
+{
+ kcov_task_reset(t);
+ t->kcov_handle = current->kcov_handle;
+}
+
+static void kcov_reset(struct kcov *kcov)
+{
+ kcov->t = NULL;
+ kcov->mode = KCOV_MODE_INIT;
+ kcov->remote = false;
+ kcov->remote_size = 0;
+ kcov->sequence++;
+}
+
+static void kcov_remote_reset(struct kcov *kcov)
+{
+ int bkt;
+ struct kcov_remote *remote;
+ struct hlist_node *tmp;
+
+ spin_lock(&kcov_remote_lock);
+ hash_for_each_safe(kcov_remote_map, bkt, tmp, remote, hnode) {
+ if (remote->kcov != kcov)
+ continue;
+ kcov_debug("removing handle %llx\n", remote->handle);
+ hash_del(&remote->hnode);
+ kfree(remote);
+ }
+ /* Do reset before unlock to prevent races with kcov_remote_start(). */
+ kcov_reset(kcov);
+ spin_unlock(&kcov_remote_lock);
+}
+
+static void kcov_disable(struct task_struct *t, struct kcov *kcov)
+{
+ kcov_task_reset(t);
+ if (kcov->remote)
+ kcov_remote_reset(kcov);
+ else
+ kcov_reset(kcov);
+}
+
static void kcov_get(struct kcov *kcov)
{
refcount_inc(&kcov->refcount);
@@ -235,20 +392,12 @@ static void kcov_get(struct kcov *kcov)
static void kcov_put(struct kcov *kcov)
{
if (refcount_dec_and_test(&kcov->refcount)) {
+ kcov_remote_reset(kcov);
vfree(kcov->area);
kfree(kcov);
}
}
-void kcov_task_init(struct task_struct *t)
-{
- WRITE_ONCE(t->kcov_mode, KCOV_MODE_DISABLED);
- barrier();
- t->kcov_size = 0;
- t->kcov_area = NULL;
- t->kcov = NULL;
-}
-
void kcov_task_exit(struct task_struct *t)
{
struct kcov *kcov;
@@ -256,15 +405,36 @@ void kcov_task_exit(struct task_struct *t)
kcov = t->kcov;
if (kcov == NULL)
return;
+
spin_lock(&kcov->lock);
+ kcov_debug("t = %px, kcov->t = %px\n", t, kcov->t);
+ /*
+ * For KCOV_ENABLE devices we want to make sure that t->kcov->t == t,
+ * which comes down to:
+ * WARN_ON(!kcov->remote && kcov->t != t);
+ *
+ * For KCOV_REMOTE_ENABLE devices, the exiting task is either:
+ * 2. A remote task between kcov_remote_start() and kcov_remote_stop().
+ * In this case we should print a warning right away, since a task
+ * shouldn't be exiting when it's in a kcov coverage collection
+ * section. Here t points to the task that is collecting remote
+ * coverage, and t->kcov->t points to the thread that created the
+ * kcov device. Which means that to detect this case we need to
+ * check that t != t->kcov->t, and this gives us the following:
+ * WARN_ON(kcov->remote && kcov->t != t);
+ *
+ * 2. The task that created kcov exiting without calling KCOV_DISABLE,
+ * and then again we can make sure that t->kcov->t == t:
+ * WARN_ON(kcov->remote && kcov->t != t);
+ *
+ * By combining all three checks into one we get:
+ */
if (WARN_ON(kcov->t != t)) {
spin_unlock(&kcov->lock);
return;
}
/* Just to not leave dangling references behind. */
- kcov_task_init(t);
- kcov->t = NULL;
- kcov->mode = KCOV_MODE_INIT;
+ kcov_disable(t, kcov);
spin_unlock(&kcov->lock);
kcov_put(kcov);
}
@@ -313,6 +483,7 @@ static int kcov_open(struct inode *inode, struct file *filep)
if (!kcov)
return -ENOMEM;
kcov->mode = KCOV_MODE_DISABLED;
+ kcov->sequence = 1;
refcount_set(&kcov->refcount, 1);
spin_lock_init(&kcov->lock);
filep->private_data = kcov;
@@ -325,6 +496,20 @@ static int kcov_close(struct inode *inode, struct file *filep)
return 0;
}
+static int kcov_get_mode(unsigned long arg)
+{
+ if (arg == KCOV_TRACE_PC)
+ return KCOV_MODE_TRACE_PC;
+ else if (arg == KCOV_TRACE_CMP)
+#ifdef CONFIG_KCOV_ENABLE_COMPARISONS
+ return KCOV_MODE_TRACE_CMP;
+#else
+ return -ENOTSUPP;
+#endif
+ else
+ return -EINVAL;
+}
+
/*
* Fault in a lazily-faulted vmalloc area before it can be used by
* __santizer_cov_trace_pc(), to avoid recursion issues if any code on the
@@ -340,14 +525,35 @@ static void kcov_fault_in_area(struct kcov *kcov)
READ_ONCE(area[offset]);
}
+static inline bool kcov_check_handle(u64 handle, bool common_valid,
+ bool uncommon_valid, bool zero_valid)
+{
+ if (handle & ~(KCOV_SUBSYSTEM_MASK | KCOV_INSTANCE_MASK))
+ return false;
+ switch (handle & KCOV_SUBSYSTEM_MASK) {
+ case KCOV_SUBSYSTEM_COMMON:
+ return (handle & KCOV_INSTANCE_MASK) ?
+ common_valid : zero_valid;
+ case KCOV_SUBSYSTEM_USB:
+ return uncommon_valid;
+ default:
+ return false;
+ }
+ return false;
+}
+
static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
unsigned long arg)
{
struct task_struct *t;
unsigned long size, unused;
+ int mode, i;
+ struct kcov_remote_arg *remote_arg;
+ struct kcov_remote *remote;
switch (cmd) {
case KCOV_INIT_TRACE:
+ kcov_debug("KCOV_INIT_TRACE\n");
/*
* Enable kcov in trace mode and setup buffer size.
* Must happen before anything else.
@@ -366,6 +572,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
kcov->mode = KCOV_MODE_INIT;
return 0;
case KCOV_ENABLE:
+ kcov_debug("KCOV_ENABLE\n");
/*
* Enable coverage for the current task.
* At this point user must have been enabled trace mode,
@@ -378,29 +585,20 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
t = current;
if (kcov->t != NULL || t->kcov != NULL)
return -EBUSY;
- if (arg == KCOV_TRACE_PC)
- kcov->mode = KCOV_MODE_TRACE_PC;
- else if (arg == KCOV_TRACE_CMP)
-#ifdef CONFIG_KCOV_ENABLE_COMPARISONS
- kcov->mode = KCOV_MODE_TRACE_CMP;
-#else
- return -ENOTSUPP;
-#endif
- else
- return -EINVAL;
+ mode = kcov_get_mode(arg);
+ if (mode < 0)
+ return mode;
kcov_fault_in_area(kcov);
- /* Cache in task struct for performance. */
- t->kcov_size = kcov->size;
- t->kcov_area = kcov->area;
- /* See comment in check_kcov_mode(). */
- barrier();
- WRITE_ONCE(t->kcov_mode, kcov->mode);
+ kcov->mode = mode;
+ kcov_start(t, kcov->size, kcov->area, kcov->mode,
+ kcov->sequence);
t->kcov = kcov;
kcov->t = t;
- /* This is put either in kcov_task_exit() or in KCOV_DISABLE. */
+ /* Put either in kcov_task_exit() or in KCOV_DISABLE. */
kcov_get(kcov);
return 0;
case KCOV_DISABLE:
+ kcov_debug("KCOV_DISABLE\n");
/* Disable coverage for the current task. */
unused = arg;
if (unused != 0 || current->kcov != kcov)
@@ -408,11 +606,65 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
t = current;
if (WARN_ON(kcov->t != t))
return -EINVAL;
- kcov_task_init(t);
- kcov->t = NULL;
- kcov->mode = KCOV_MODE_INIT;
+ kcov_disable(t, kcov);
kcov_put(kcov);
return 0;
+ case KCOV_REMOTE_ENABLE:
+ kcov_debug("KCOV_REMOTE_ENABLE\n");
+ if (kcov->mode != KCOV_MODE_INIT || !kcov->area)
+ return -EINVAL;
+ t = current;
+ if (kcov->t != NULL || t->kcov != NULL)
+ return -EBUSY;
+ remote_arg = (struct kcov_remote_arg *)arg;
+ mode = kcov_get_mode(remote_arg->trace_mode);
+ if (mode < 0)
+ return mode;
+ if (remote_arg->area_size > LONG_MAX / sizeof(unsigned long))
+ return -EINVAL;
+ kcov->mode = mode;
+ t->kcov = kcov;
+ kcov->t = t;
+ kcov->remote = true;
+ kcov->remote_size = remote_arg->area_size;
+ spin_lock(&kcov_remote_lock);
+ for (i = 0; i < remote_arg->num_handles; i++) {
+ kcov_debug("handle %llx\n", remote_arg->handles[i]);
+ if (!kcov_check_handle(remote_arg->handles[i],
+ false, true, false)) {
+ spin_unlock(&kcov_remote_lock);
+ kcov_disable(t, kcov);
+ return -EINVAL;
+ }
+ remote = kcov_remote_add(kcov, remote_arg->handles[i]);
+ if (IS_ERR(remote)) {
+ spin_unlock(&kcov_remote_lock);
+ kcov_disable(t, kcov);
+ return PTR_ERR(remote);
+ }
+ }
+ if (remote_arg->common_handle) {
+ kcov_debug("common handle %llx\n",
+ remote_arg->common_handle);
+ if (!kcov_check_handle(remote_arg->common_handle,
+ true, false, false)) {
+ spin_unlock(&kcov_remote_lock);
+ kcov_disable(t, kcov);
+ return -EINVAL;
+ }
+ remote = kcov_remote_add(kcov,
+ remote_arg->common_handle);
+ if (IS_ERR(remote)) {
+ spin_unlock(&kcov_remote_lock);
+ kcov_disable(t, kcov);
+ return PTR_ERR(remote);
+ }
+ t->kcov_handle = remote_arg->common_handle;
+ }
+ spin_unlock(&kcov_remote_lock);
+ /* Put either in kcov_task_exit() or in KCOV_DISABLE. */
+ kcov_get(kcov);
+ return 0;
default:
return -ENOTTY;
}
@@ -422,11 +674,35 @@ static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
{
struct kcov *kcov;
int res;
+ struct kcov_remote_arg *remote_arg = NULL;
+ unsigned int remote_num_handles;
+ unsigned long remote_arg_size;
+
+ if (cmd == KCOV_REMOTE_ENABLE) {
+ if (get_user(remote_num_handles, (unsigned __user *)(arg +
+ offsetof(struct kcov_remote_arg, num_handles))))
+ return -EFAULT;
+ if (remote_num_handles > KCOV_REMOTE_MAX_HANDLES)
+ return -EINVAL;
+ remote_arg_size = struct_size(remote_arg, handles,
+ remote_num_handles);
+ remote_arg = memdup_user((void __user *)arg, remote_arg_size);
+ if (IS_ERR(remote_arg))
+ return PTR_ERR(remote_arg);
+ if (remote_arg->num_handles != remote_num_handles) {
+ kfree(remote_arg);
+ return -EINVAL;
+ }
+ arg = (unsigned long)remote_arg;
+ }
kcov = filep->private_data;
spin_lock(&kcov->lock);
res = kcov_ioctl_locked(kcov, cmd, arg);
spin_unlock(&kcov->lock);
+
+ kfree(remote_arg);
+
return res;
}
@@ -438,6 +714,207 @@ static const struct file_operations kcov_fops = {
.release = kcov_close,
};
+/*
+ * kcov_remote_start() and kcov_remote_stop() can be used to annotate a section
+ * of code in a kernel background thread to allow kcov to be used to collect
+ * coverage from that part of code.
+ *
+ * The handle argument of kcov_remote_start() identifies a code section that is
+ * used for coverage collection. A userspace process passes this handle to
+ * KCOV_REMOTE_ENABLE ioctl to make the used kcov device start collecting
+ * coverage for the code section identified by this handle.
+ *
+ * The usage of these annotations in the kernel code is different depending on
+ * the type of the kernel thread whose code is being annotated.
+ *
+ * For global kernel threads that are spawned in a limited number of instances
+ * (e.g. one USB hub_event() worker thread is spawned per USB HCD), each
+ * instance must be assigned a unique 4-byte instance id. The instance id is
+ * then combined with a 1-byte subsystem id to get a handle via
+ * kcov_remote_handle(subsystem_id, instance_id).
+ *
+ * For local kernel threads that are spawned from system calls handler when a
+ * user interacts with some kernel interface (e.g. vhost workers), a handle is
+ * passed from a userspace process as the common_handle field of the
+ * kcov_remote_arg struct (note, that the user must generate a handle by using
+ * kcov_remote_handle() with KCOV_SUBSYSTEM_COMMON as the subsystem id and an
+ * arbitrary 4-byte non-zero number as the instance id). This common handle
+ * then gets saved into the task_struct of the process that issued the
+ * KCOV_REMOTE_ENABLE ioctl. When this proccess issues system calls that spawn
+ * kernel threads, the common handle must be retrived via kcov_common_handle()
+ * and passed to the spawned threads via custom annotations. Those kernel
+ * threads must in turn be annotated with kcov_remote_start(common_handle) and
+ * kcov_remote_stop(). All of the threads that are spawned by the same process
+ * obtain the same handle, hence the name "common".
+ *
+ * See Documentation/dev-tools/kcov.rst for more details.
+ *
+ * Internally, this function looks up the kcov device associated with the
+ * provided handle, allocates an area for coverage collection, and saves the
+ * pointers to kcov and area into the current task_struct to allow coverage to
+ * be collected via __sanitizer_cov_trace_pc()
+ * In turns kcov_remote_stop() clears those pointers from task_struct to stop
+ * collecting coverage and copies all collected coverage into the kcov area.
+ */
+void kcov_remote_start(u64 handle)
+{
+ struct kcov_remote *remote;
+ void *area;
+ struct task_struct *t;
+ unsigned int size;
+ enum kcov_mode mode;
+ int sequence;
+
+ if (WARN_ON(!kcov_check_handle(handle, true, true, true)))
+ return;
+ if (WARN_ON(!in_task()))
+ return;
+ t = current;
+ /*
+ * Check that kcov_remote_start is not called twice
+ * nor called by user tasks (with enabled kcov).
+ */
+ if (WARN_ON(t->kcov))
+ return;
+
+ kcov_debug("handle = %llx\n", handle);
+
+ spin_lock(&kcov_remote_lock);
+ remote = kcov_remote_find(handle);
+ if (!remote) {
+ kcov_debug("no remote found");
+ spin_unlock(&kcov_remote_lock);
+ return;
+ }
+ /* Put in kcov_remote_stop(). */
+ kcov_get(remote->kcov);
+ t->kcov = remote->kcov;
+ /*
+ * Read kcov fields before unlock to prevent races with
+ * KCOV_DISABLE / kcov_remote_reset().
+ */
+ size = remote->kcov->remote_size;
+ mode = remote->kcov->mode;
+ sequence = remote->kcov->sequence;
+ area = kcov_remote_area_get(size);
+ spin_unlock(&kcov_remote_lock);
+
+ if (!area) {
+ area = vmalloc(size * sizeof(unsigned long));
+ if (!area) {
+ t->kcov = NULL;
+ kcov_put(remote->kcov);
+ return;
+ }
+ }
+ /* Reset coverage size. */
+ *(u64 *)area = 0;
+
+ kcov_debug("area = %px, size = %u", area, size);
+
+ kcov_start(t, size, area, mode, sequence);
+
+}
+EXPORT_SYMBOL(kcov_remote_start);
+
+static void kcov_move_area(enum kcov_mode mode, void *dst_area,
+ unsigned int dst_area_size, void *src_area)
+{
+ u64 word_size = sizeof(unsigned long);
+ u64 count_size, entry_size_log;
+ u64 dst_len, src_len;
+ void *dst_entries, *src_entries;
+ u64 dst_occupied, dst_free, bytes_to_move, entries_moved;
+
+ kcov_debug("%px %u <= %px %lu\n",
+ dst_area, dst_area_size, src_area, *(unsigned long *)src_area);
+
+ switch (mode) {
+ case KCOV_MODE_TRACE_PC:
+ dst_len = READ_ONCE(*(unsigned long *)dst_area);
+ src_len = *(unsigned long *)src_area;
+ count_size = sizeof(unsigned long);
+ entry_size_log = __ilog2_u64(sizeof(unsigned long));
+ break;
+ case KCOV_MODE_TRACE_CMP:
+ dst_len = READ_ONCE(*(u64 *)dst_area);
+ src_len = *(u64 *)src_area;
+ count_size = sizeof(u64);
+ BUILD_BUG_ON(!is_power_of_2(KCOV_WORDS_PER_CMP));
+ entry_size_log = __ilog2_u64(sizeof(u64) * KCOV_WORDS_PER_CMP);
+ break;
+ default:
+ WARN_ON(1);
+ return;
+ }
+
+ /* As arm can't divide u64 integers use log of entry size. */
+ if (dst_len > ((dst_area_size * word_size - count_size) >>
+ entry_size_log))
+ return;
+ dst_occupied = count_size + (dst_len << entry_size_log);
+ dst_free = dst_area_size * word_size - dst_occupied;
+ bytes_to_move = min(dst_free, src_len << entry_size_log);
+ dst_entries = dst_area + dst_occupied;
+ src_entries = src_area + count_size;
+ memcpy(dst_entries, src_entries, bytes_to_move);
+ entries_moved = bytes_to_move >> entry_size_log;
+
+ switch (mode) {
+ case KCOV_MODE_TRACE_PC:
+ WRITE_ONCE(*(unsigned long *)dst_area, dst_len + entries_moved);
+ break;
+ case KCOV_MODE_TRACE_CMP:
+ WRITE_ONCE(*(u64 *)dst_area, dst_len + entries_moved);
+ break;
+ default:
+ break;
+ }
+}
+
+/* See the comment before kcov_remote_start() for usage details. */
+void kcov_remote_stop(void)
+{
+ struct task_struct *t = current;
+ struct kcov *kcov = t->kcov;
+ void *area = t->kcov_area;
+ unsigned int size = t->kcov_size;
+ int sequence = t->kcov_sequence;
+
+ if (!kcov) {
+ kcov_debug("no kcov found\n");
+ return;
+ }
+
+ kcov_stop(t);
+ t->kcov = NULL;
+
+ spin_lock(&kcov->lock);
+ /*
+ * KCOV_DISABLE could have been called between kcov_remote_start()
+ * and kcov_remote_stop(), hence the check.
+ */
+ kcov_debug("move if: %d == %d && %d\n",
+ sequence, kcov->sequence, (int)kcov->remote);
+ if (sequence == kcov->sequence && kcov->remote)
+ kcov_move_area(kcov->mode, kcov->area, kcov->size, area);
+ spin_unlock(&kcov->lock);
+
+ spin_lock(&kcov_remote_lock);
+ kcov_remote_area_put(area, size);
+ spin_unlock(&kcov_remote_lock);
+
+ kcov_put(kcov);
+}
+EXPORT_SYMBOL(kcov_remote_stop);
+
+/* See the comment before kcov_remote_start() for usage details. */
+u64 kcov_common_handle(void)
+{
+ return current->kcov_handle;
+}
+EXPORT_SYMBOL(kcov_common_handle);
+
static int __init kcov_init(void)
{
/*
diff --git a/kernel/module.c b/kernel/module.c
index 052a40212b8e..3a486f826224 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1033,6 +1033,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
free_module(mod);
+ /* someone could wait for the module in add_unformed_module() */
+ wake_up_all(&module_wq);
return 0;
out:
mutex_unlock(&module_mutex);
@@ -1400,7 +1402,7 @@ static int verify_namespace_is_imported(const struct load_info *info,
char *imported_namespace;
namespace = kernel_symbol_namespace(sym);
- if (namespace) {
+ if (namespace && namespace[0]) {
imported_namespace = get_modinfo(info, "import_ns");
while (imported_namespace) {
if (strcmp(namespace, imported_namespace) == 0)
diff --git a/kernel/notifier.c b/kernel/notifier.c
index d9f5081d578d..63d7501ac638 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -23,22 +23,10 @@ static int notifier_chain_register(struct notifier_block **nl,
struct notifier_block *n)
{
while ((*nl) != NULL) {
- WARN_ONCE(((*nl) == n), "double register detected");
- if (n->priority > (*nl)->priority)
- break;
- nl = &((*nl)->next);
- }
- n->next = *nl;
- rcu_assign_pointer(*nl, n);
- return 0;
-}
-
-static int notifier_chain_cond_register(struct notifier_block **nl,
- struct notifier_block *n)
-{
- while ((*nl) != NULL) {
- if ((*nl) == n)
+ if (unlikely((*nl) == n)) {
+ WARN(1, "double register detected");
return 0;
+ }
if (n->priority > (*nl)->priority)
break;
nl = &((*nl)->next);
@@ -233,29 +221,6 @@ int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
EXPORT_SYMBOL_GPL(blocking_notifier_chain_register);
/**
- * blocking_notifier_chain_cond_register - Cond add notifier to a blocking notifier chain
- * @nh: Pointer to head of the blocking notifier chain
- * @n: New entry in notifier chain
- *
- * Adds a notifier to a blocking notifier chain, only if not already
- * present in the chain.
- * Must be called in process context.
- *
- * Currently always returns zero.
- */
-int blocking_notifier_chain_cond_register(struct blocking_notifier_head *nh,
- struct notifier_block *n)
-{
- int ret;
-
- down_write(&nh->rwsem);
- ret = notifier_chain_cond_register(&nh->head, n);
- up_write(&nh->rwsem);
- return ret;
-}
-EXPORT_SYMBOL_GPL(blocking_notifier_chain_cond_register);
-
-/**
* blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain
* @nh: Pointer to head of the blocking notifier chain
* @n: Entry to remove from notifier chain
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index a45cba7df0ae..83edf8698118 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -714,8 +714,10 @@ s32 freq_qos_read_value(struct freq_constraints *qos,
* @req: Constraint request to apply.
* @action: Action to perform (add/update/remove).
* @value: Value to assign to the QoS request.
+ *
+ * This is only meant to be called from inside pm_qos, not drivers.
*/
-static int freq_qos_apply(struct freq_qos_request *req,
+int freq_qos_apply(struct freq_qos_request *req,
enum pm_qos_req_action action, s32 value)
{
int ret;
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index c8be5a0f5259..1ef6f75d92f1 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2961,7 +2961,7 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work)
static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
.func = wake_up_klogd_work_func,
- .flags = IRQ_WORK_LAZY,
+ .flags = ATOMIC_INIT(IRQ_WORK_LAZY),
};
void wake_up_klogd(void)
diff --git a/kernel/profile.c b/kernel/profile.c
index af7c94bf5fa1..4b144b02ca5d 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -336,7 +336,7 @@ static int profile_dead_cpu(unsigned int cpu)
struct page *page;
int i;
- if (prof_cpu_mask != NULL)
+ if (cpumask_available(prof_cpu_mask))
cpumask_clear_cpu(cpu, prof_cpu_mask);
for (i = 0; i < 2; i++) {
@@ -373,7 +373,7 @@ static int profile_prepare_cpu(unsigned int cpu)
static int profile_online_cpu(unsigned int cpu)
{
- if (prof_cpu_mask != NULL)
+ if (cpumask_available(prof_cpu_mask))
cpumask_set_cpu(cpu, prof_cpu_mask);
return 0;
@@ -403,7 +403,7 @@ void profile_tick(int type)
{
struct pt_regs *regs = get_irq_regs();
- if (!user_mode(regs) && prof_cpu_mask != NULL &&
+ if (!user_mode(regs) && cpumask_available(prof_cpu_mask) &&
cpumask_test_cpu(smp_processor_id(), prof_cpu_mask))
profile_hit(type, (void *)profile_pc(regs));
}
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 86800b4d5453..322ca8860f54 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -915,7 +915,7 @@ static int __init sugov_register(void)
{
return cpufreq_register_governor(&schedutil_gov);
}
-fs_initcall(sugov_register);
+core_initcall(sugov_register);
#ifdef CONFIG_ENERGY_MODEL
extern bool sched_energy_update;
diff --git a/kernel/sys.c b/kernel/sys.c
index d3aef31e24dc..a9331f101883 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1279,11 +1279,13 @@ SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
{
- struct oldold_utsname tmp = {};
+ struct oldold_utsname tmp;
if (!name)
return -EFAULT;
+ memset(&tmp, 0, sizeof(tmp));
+
down_read(&uts_sem);
memcpy(&tmp.sysname, &utsname()->sysname, __OLD_UTS_LEN);
memcpy(&tmp.nodename, &utsname()->nodename, __OLD_UTS_LEN);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 9e20873148c6..8de90ea31280 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -966,7 +966,8 @@ static int enqueue_hrtimer(struct hrtimer *timer,
base->cpu_base->active_bases |= 1 << base->index;
- timer->state = HRTIMER_STATE_ENQUEUED;
+ /* Pairs with the lockless read in hrtimer_is_queued() */
+ WRITE_ONCE(timer->state, HRTIMER_STATE_ENQUEUED);
return timerqueue_add(&base->active, &timer->node);
}
@@ -988,7 +989,8 @@ static void __remove_hrtimer(struct hrtimer *timer,
struct hrtimer_cpu_base *cpu_base = base->cpu_base;
u8 state = timer->state;
- timer->state = newstate;
+ /* Pairs with the lockless read in hrtimer_is_queued() */
+ WRITE_ONCE(timer->state, newstate);
if (!(state & HRTIMER_STATE_ENQUEUED))
return;
@@ -1013,8 +1015,9 @@ static void __remove_hrtimer(struct hrtimer *timer,
static inline int
remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart)
{
- if (hrtimer_is_queued(timer)) {
- u8 state = timer->state;
+ u8 state = timer->state;
+
+ if (state & HRTIMER_STATE_ENQUEUED) {
int reprogram;
/*
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 58e312e7380f..704ccd9451b0 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -179,7 +179,7 @@ int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz
return error;
if (tz) {
- /* Verify we're witin the +-15 hrs range */
+ /* Verify we're within the +-15 hrs range */
if (tz->tz_minuteswest > 15*60 || tz->tz_minuteswest < -15*60)
return -EINVAL;
@@ -548,18 +548,21 @@ EXPORT_SYMBOL(set_normalized_timespec64);
*/
struct timespec64 ns_to_timespec64(const s64 nsec)
{
- struct timespec64 ts;
+ struct timespec64 ts = { 0, 0 };
s32 rem;
- if (!nsec)
- return (struct timespec64) {0, 0};
-
- ts.tv_sec = div_s64_rem(nsec, NSEC_PER_SEC, &rem);
- if (unlikely(rem < 0)) {
- ts.tv_sec--;
- rem += NSEC_PER_SEC;
+ if (likely(nsec > 0)) {
+ ts.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
+ ts.tv_nsec = rem;
+ } else if (nsec < 0) {
+ /*
+ * With negative times, tv_sec points to the earlier
+ * second, and tv_nsec counts the nanoseconds since
+ * then, so tv_nsec is always a positive number.
+ */
+ ts.tv_sec = -div_u64_rem(-nsec - 1, NSEC_PER_SEC, &rem) - 1;
+ ts.tv_nsec = NSEC_PER_SEC - rem - 1;
}
- ts.tv_nsec = rem;
return ts;
}
@@ -878,10 +881,11 @@ int get_timespec64(struct timespec64 *ts,
ts->tv_sec = kts.tv_sec;
- /* Zero out the padding for 32 bit systems or in compat mode */
+ /* Zero out the padding in compat mode */
if (in_compat_syscall())
kts.tv_nsec &= 0xFFFFFFFFUL;
+ /* In 32-bit mode, this drops the padding */
ts->tv_nsec = kts.tv_nsec;
return 0;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index cdf5afa87f65..25a0fcfa7a5d 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -671,6 +671,15 @@ config HIST_TRIGGERS
See Documentation/trace/histogram.rst.
If in doubt, say N.
+config TRACE_EVENT_INJECT
+ bool "Trace event injection"
+ depends on TRACING
+ help
+ Allow user-space to inject a specific trace event into the ring
+ buffer. This is mainly used for testing purpose.
+
+ If unsure, say N.
+
config MMIOTRACE_TEST
tristate "Test module for mmiotrace"
depends on MMIOTRACE && m
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index c2b2148bb1d2..0e63db62225f 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -69,6 +69,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
endif
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
+obj-$(CONFIG_TRACE_EVENT_INJECT) += trace_events_inject.o
obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o
obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o
obj-$(CONFIG_KPROBE_EVENTS) += trace_kprobe.o
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ffc91d4935ac..e5ef4ae9edb5 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -739,7 +739,7 @@ BPF_CALL_1(bpf_send_signal, u32, sig)
return -EINVAL;
work = this_cpu_ptr(&send_signal_work);
- if (work->irq_work.flags & IRQ_WORK_BUSY)
+ if (atomic_read(&work->irq_work.flags) & IRQ_WORK_BUSY)
return -EBUSY;
/* Add the current task, which is the target of sending signal,
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 66358d66c933..4bf050fcfe3b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -11,6 +11,7 @@
#include <linux/trace_seq.h>
#include <linux/spinlock.h>
#include <linux/irq_work.h>
+#include <linux/security.h>
#include <linux/uaccess.h>
#include <linux/hardirq.h>
#include <linux/kthread.h> /* for self test */
@@ -5068,6 +5069,11 @@ static __init int test_ringbuffer(void)
int cpu;
int ret = 0;
+ if (security_locked_down(LOCKDOWN_TRACEFS)) {
+ pr_warning("Lockdown is enabled, skipping ring buffer tests\n");
+ return 0;
+ }
+
pr_info("Running ring buffer tests...\n");
buffer = ring_buffer_alloc(RB_TEST_BUFFER_SIZE, RB_FL_OVERWRITE);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 02a23a6e5e00..23459d53d576 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1888,6 +1888,12 @@ int __init register_tracer(struct tracer *type)
return -1;
}
+ if (security_locked_down(LOCKDOWN_TRACEFS)) {
+ pr_warning("Can not register tracer %s due to lockdown\n",
+ type->name);
+ return -EPERM;
+ }
+
mutex_lock(&trace_types_lock);
tracing_selftest_running = true;
@@ -8789,6 +8795,11 @@ struct dentry *tracing_init_dentry(void)
{
struct trace_array *tr = &global_trace;
+ if (security_locked_down(LOCKDOWN_TRACEFS)) {
+ pr_warning("Tracing disabled due to lockdown\n");
+ return ERR_PTR(-EPERM);
+ }
+
/* The top level trace array uses NULL as parent */
if (tr->dir)
return NULL;
@@ -9231,6 +9242,12 @@ __init static int tracer_alloc_buffers(void)
int ring_buf_size;
int ret = -ENOMEM;
+
+ if (security_locked_down(LOCKDOWN_TRACEFS)) {
+ pr_warning("Tracing disabled due to lockdown\n");
+ return -EPERM;
+ }
+
/*
* Make sure we don't accidently add more trace options
* than we have bits for.
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index ca7fccafbcbb..63bf60f79398 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1601,6 +1601,7 @@ extern struct list_head ftrace_events;
extern const struct file_operations event_trigger_fops;
extern const struct file_operations event_hist_fops;
+extern const struct file_operations event_inject_fops;
#ifdef CONFIG_HIST_TRIGGERS
extern int register_trigger_hist_cmd(void);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 6b3a69e9aa6a..c6de3cebc127 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2044,6 +2044,12 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
trace_create_file("format", 0444, file->dir, call,
&ftrace_event_format_fops);
+#ifdef CONFIG_TRACE_EVENT_INJECT
+ if (call->event.type && call->class->reg)
+ trace_create_file("inject", 0200, file->dir, file,
+ &event_inject_fops);
+#endif
+
return 0;
}
diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c
new file mode 100644
index 000000000000..d43710718ee5
--- /dev/null
+++ b/kernel/trace/trace_events_inject.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * trace_events_inject - trace event injection
+ *
+ * Copyright (C) 2019 Cong Wang <cwang@twitter.com>
+ */
+
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/rculist.h>
+
+#include "trace.h"
+
+static int
+trace_inject_entry(struct trace_event_file *file, void *rec, int len)
+{
+ struct trace_event_buffer fbuffer;
+ struct ring_buffer *buffer;
+ int written = 0;
+ void *entry;
+
+ rcu_read_lock_sched();
+ buffer = file->tr->trace_buffer.buffer;
+ entry = trace_event_buffer_reserve(&fbuffer, file, len);
+ if (entry) {
+ memcpy(entry, rec, len);
+ written = len;
+ trace_event_buffer_commit(&fbuffer);
+ }
+ rcu_read_unlock_sched();
+
+ return written;
+}
+
+static int
+parse_field(char *str, struct trace_event_call *call,
+ struct ftrace_event_field **pf, u64 *pv)
+{
+ struct ftrace_event_field *field;
+ char *field_name;
+ int s, i = 0;
+ int len;
+ u64 val;
+
+ if (!str[i])
+ return 0;
+ /* First find the field to associate to */
+ while (isspace(str[i]))
+ i++;
+ s = i;
+ while (isalnum(str[i]) || str[i] == '_')
+ i++;
+ len = i - s;
+ if (!len)
+ return -EINVAL;
+
+ field_name = kmemdup_nul(str + s, len, GFP_KERNEL);
+ if (!field_name)
+ return -ENOMEM;
+ field = trace_find_event_field(call, field_name);
+ kfree(field_name);
+ if (!field)
+ return -ENOENT;
+
+ *pf = field;
+ while (isspace(str[i]))
+ i++;
+ if (str[i] != '=')
+ return -EINVAL;
+ i++;
+ while (isspace(str[i]))
+ i++;
+ s = i;
+ if (isdigit(str[i]) || str[i] == '-') {
+ char *num, c;
+ int ret;
+
+ /* Make sure the field is not a string */
+ if (is_string_field(field))
+ return -EINVAL;
+
+ if (str[i] == '-')
+ i++;
+
+ /* We allow 0xDEADBEEF */
+ while (isalnum(str[i]))
+ i++;
+ num = str + s;
+ c = str[i];
+ if (c != '\0' && !isspace(c))
+ return -EINVAL;
+ str[i] = '\0';
+ /* Make sure it is a value */
+ if (field->is_signed)
+ ret = kstrtoll(num, 0, &val);
+ else
+ ret = kstrtoull(num, 0, &val);
+ str[i] = c;
+ if (ret)
+ return ret;
+
+ *pv = val;
+ return i;
+ } else if (str[i] == '\'' || str[i] == '"') {
+ char q = str[i];
+
+ /* Make sure the field is OK for strings */
+ if (!is_string_field(field))
+ return -EINVAL;
+
+ for (i++; str[i]; i++) {
+ if (str[i] == '\\' && str[i + 1]) {
+ i++;
+ continue;
+ }
+ if (str[i] == q)
+ break;
+ }
+ if (!str[i])
+ return -EINVAL;
+
+ /* Skip quotes */
+ s++;
+ len = i - s;
+ if (len >= MAX_FILTER_STR_VAL)
+ return -EINVAL;
+
+ *pv = (unsigned long)(str + s);
+ str[i] = 0;
+ /* go past the last quote */
+ i++;
+ return i;
+ }
+
+ return -EINVAL;
+}
+
+static int trace_get_entry_size(struct trace_event_call *call)
+{
+ struct ftrace_event_field *field;
+ struct list_head *head;
+ int size = 0;
+
+ head = trace_get_fields(call);
+ list_for_each_entry(field, head, link) {
+ if (field->size + field->offset > size)
+ size = field->size + field->offset;
+ }
+
+ return size;
+}
+
+static void *trace_alloc_entry(struct trace_event_call *call, int *size)
+{
+ int entry_size = trace_get_entry_size(call);
+ struct ftrace_event_field *field;
+ struct list_head *head;
+ void *entry = NULL;
+
+ /* We need an extra '\0' at the end. */
+ entry = kzalloc(entry_size + 1, GFP_KERNEL);
+ if (!entry)
+ return NULL;
+
+ head = trace_get_fields(call);
+ list_for_each_entry(field, head, link) {
+ if (!is_string_field(field))
+ continue;
+ if (field->filter_type == FILTER_STATIC_STRING)
+ continue;
+ if (field->filter_type == FILTER_DYN_STRING) {
+ u32 *str_item;
+ int str_loc = entry_size & 0xffff;
+
+ str_item = (u32 *)(entry + field->offset);
+ *str_item = str_loc; /* string length is 0. */
+ } else {
+ char **paddr;
+
+ paddr = (char **)(entry + field->offset);
+ *paddr = "";
+ }
+ }
+
+ *size = entry_size + 1;
+ return entry;
+}
+
+#define INJECT_STRING "STATIC STRING CAN NOT BE INJECTED"
+
+/* Caller is responsible to free the *pentry. */
+static int parse_entry(char *str, struct trace_event_call *call, void **pentry)
+{
+ struct ftrace_event_field *field;
+ unsigned long irq_flags;
+ void *entry = NULL;
+ int entry_size;
+ u64 val;
+ int len;
+
+ entry = trace_alloc_entry(call, &entry_size);
+ *pentry = entry;
+ if (!entry)
+ return -ENOMEM;
+
+ local_save_flags(irq_flags);
+ tracing_generic_entry_update(entry, call->event.type, irq_flags,
+ preempt_count());
+
+ while ((len = parse_field(str, call, &field, &val)) > 0) {
+ if (is_function_field(field))
+ return -EINVAL;
+
+ if (is_string_field(field)) {
+ char *addr = (char *)(unsigned long) val;
+
+ if (field->filter_type == FILTER_STATIC_STRING) {
+ strlcpy(entry + field->offset, addr, field->size);
+ } else if (field->filter_type == FILTER_DYN_STRING) {
+ int str_len = strlen(addr) + 1;
+ int str_loc = entry_size & 0xffff;
+ u32 *str_item;
+
+ entry_size += str_len;
+ *pentry = krealloc(entry, entry_size, GFP_KERNEL);
+ if (!*pentry) {
+ kfree(entry);
+ return -ENOMEM;
+ }
+ entry = *pentry;
+
+ strlcpy(entry + (entry_size - str_len), addr, str_len);
+ str_item = (u32 *)(entry + field->offset);
+ *str_item = (str_len << 16) | str_loc;
+ } else {
+ char **paddr;
+
+ paddr = (char **)(entry + field->offset);
+ *paddr = INJECT_STRING;
+ }
+ } else {
+ switch (field->size) {
+ case 1: {
+ u8 tmp = (u8) val;
+
+ memcpy(entry + field->offset, &tmp, 1);
+ break;
+ }
+ case 2: {
+ u16 tmp = (u16) val;
+
+ memcpy(entry + field->offset, &tmp, 2);
+ break;
+ }
+ case 4: {
+ u32 tmp = (u32) val;
+
+ memcpy(entry + field->offset, &tmp, 4);
+ break;
+ }
+ case 8:
+ memcpy(entry + field->offset, &val, 8);
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ str += len;
+ }
+
+ if (len < 0)
+ return len;
+
+ return entry_size;
+}
+
+static ssize_t
+event_inject_write(struct file *filp, const char __user *ubuf, size_t cnt,
+ loff_t *ppos)
+{
+ struct trace_event_call *call;
+ struct trace_event_file *file;
+ int err = -ENODEV, size;
+ void *entry = NULL;
+ char *buf;
+
+ if (cnt >= PAGE_SIZE)
+ return -EINVAL;
+
+ buf = memdup_user_nul(ubuf, cnt);
+ if (IS_ERR(buf))
+ return PTR_ERR(buf);
+ strim(buf);
+
+ mutex_lock(&event_mutex);
+ file = event_file_data(filp);
+ if (file) {
+ call = file->event_call;
+ size = parse_entry(buf, call, &entry);
+ if (size < 0)
+ err = size;
+ else
+ err = trace_inject_entry(file, entry, size);
+ }
+ mutex_unlock(&event_mutex);
+
+ kfree(entry);
+ kfree(buf);
+
+ if (err < 0)
+ return err;
+
+ *ppos += err;
+ return cnt;
+}
+
+static ssize_t
+event_inject_read(struct file *file, char __user *buf, size_t size,
+ loff_t *ppos)
+{
+ return -EPERM;
+}
+
+const struct file_operations event_inject_fops = {
+ .open = tracing_open_generic,
+ .read = event_inject_read,
+ .write = event_inject_write,
+};