aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2020-07-02 15:51:48 +0200
committerPeter Zijlstra <peterz@infradead.org>2020-07-02 15:51:48 +0200
commit78c2141b654375079e3737f06f19cabfc0fcecd7 (patch)
tree5068a6c2904d93b61b37243ea39b728fe621196e /kernel
parentperf/x86/intel/uncore: Expose an Uncore unit to IIO PMON mapping (diff)
parentperf/x86: Keep LBR records unchanged in host context for guest usage (diff)
downloadlinux-dev-78c2141b654375079e3737f06f19cabfc0fcecd7.tar.xz
linux-dev-78c2141b654375079e3737f06f19cabfc0fcecd7.zip
Merge branch 'perf/vlbr'
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/cgroup.c53
-rw-r--r--kernel/bpf/devmap.c10
-rw-r--r--kernel/debug/debug_core.c10
-rw-r--r--kernel/debug/gdbstub.c6
-rw-r--r--kernel/debug/kdb/kdb_io.c72
-rw-r--r--kernel/debug/kdb/kdb_main.c3
-rw-r--r--kernel/debug/kdb/kdb_support.c7
-rw-r--r--kernel/dma/Kconfig11
-rw-r--r--kernel/dma/direct.c59
-rw-r--r--kernel/dma/pool.c7
-rw-r--r--kernel/dma/remap.c5
-rw-r--r--kernel/kexec_file.c34
-rw-r--r--kernel/kprobes.c61
-rw-r--r--kernel/kthread.c2
-rw-r--r--kernel/module.c4
-rw-r--r--kernel/nsproxy.c2
-rw-r--r--kernel/printk/printk.c10
-rw-r--r--kernel/rcu/tree.c32
-rw-r--r--kernel/sched/core.c44
-rw-r--r--kernel/sched/deadline.c1
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/sched/idle.c15
-rw-r--r--kernel/sched/sched.h2
-rw-r--r--kernel/smp.c18
-rw-r--r--kernel/trace/blktrace.c30
-rw-r--r--kernel/trace/bpf_trace.c10
-rw-r--r--kernel/trace/ftrace.c12
-rw-r--r--kernel/trace/ring_buffer.c2
-rw-r--r--kernel/trace/trace.c3
-rw-r--r--kernel/trace/trace.h3
-rw-r--r--kernel/trace/trace_boot.c10
-rw-r--r--kernel/trace/trace_entries.h14
-rw-r--r--kernel/trace/trace_events_trigger.c21
-rw-r--r--kernel/trace/trace_export.c16
-rw-r--r--kernel/trace/trace_functions.c2
-rw-r--r--kernel/trace/trace_kprobe.c6
-rw-r--r--kernel/trace/trace_probe.c4
-rw-r--r--kernel/trace/trace_probe.h2
-rw-r--r--kernel/workqueue.c10
39 files changed, 377 insertions, 238 deletions
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 4d76f16524cc..ac53102e244a 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1276,16 +1276,23 @@ static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen)
{
- if (unlikely(max_optlen > PAGE_SIZE) || max_optlen < 0)
+ if (unlikely(max_optlen < 0))
return -EINVAL;
+ if (unlikely(max_optlen > PAGE_SIZE)) {
+ /* We don't expose optvals that are greater than PAGE_SIZE
+ * to the BPF program.
+ */
+ max_optlen = PAGE_SIZE;
+ }
+
ctx->optval = kzalloc(max_optlen, GFP_USER);
if (!ctx->optval)
return -ENOMEM;
ctx->optval_end = ctx->optval + max_optlen;
- return 0;
+ return max_optlen;
}
static void sockopt_free_buf(struct bpf_sockopt_kern *ctx)
@@ -1319,13 +1326,13 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
*/
max_optlen = max_t(int, 16, *optlen);
- ret = sockopt_alloc_buf(&ctx, max_optlen);
- if (ret)
- return ret;
+ max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
+ if (max_optlen < 0)
+ return max_optlen;
ctx.optlen = *optlen;
- if (copy_from_user(ctx.optval, optval, *optlen) != 0) {
+ if (copy_from_user(ctx.optval, optval, min(*optlen, max_optlen)) != 0) {
ret = -EFAULT;
goto out;
}
@@ -1353,8 +1360,14 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
/* export any potential modifications */
*level = ctx.level;
*optname = ctx.optname;
- *optlen = ctx.optlen;
- *kernel_optval = ctx.optval;
+
+ /* optlen == 0 from BPF indicates that we should
+ * use original userspace data.
+ */
+ if (ctx.optlen != 0) {
+ *optlen = ctx.optlen;
+ *kernel_optval = ctx.optval;
+ }
}
out:
@@ -1385,12 +1398,12 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT))
return retval;
- ret = sockopt_alloc_buf(&ctx, max_optlen);
- if (ret)
- return ret;
-
ctx.optlen = max_optlen;
+ max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
+ if (max_optlen < 0)
+ return max_optlen;
+
if (!retval) {
/* If kernel getsockopt finished successfully,
* copy whatever was returned to the user back
@@ -1404,10 +1417,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
goto out;
}
- if (ctx.optlen > max_optlen)
- ctx.optlen = max_optlen;
-
- if (copy_from_user(ctx.optval, optval, ctx.optlen) != 0) {
+ if (copy_from_user(ctx.optval, optval,
+ min(ctx.optlen, max_optlen)) != 0) {
ret = -EFAULT;
goto out;
}
@@ -1436,10 +1447,12 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
goto out;
}
- if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
- put_user(ctx.optlen, optlen)) {
- ret = -EFAULT;
- goto out;
+ if (ctx.optlen != 0) {
+ if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
+ put_user(ctx.optlen, optlen)) {
+ ret = -EFAULT;
+ goto out;
+ }
}
ret = ctx.retval;
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 0cbb72cdaf63..5fdbc776a760 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -86,12 +86,13 @@ static DEFINE_PER_CPU(struct list_head, dev_flush_list);
static DEFINE_SPINLOCK(dev_map_lock);
static LIST_HEAD(dev_map_list);
-static struct hlist_head *dev_map_create_hash(unsigned int entries)
+static struct hlist_head *dev_map_create_hash(unsigned int entries,
+ int numa_node)
{
int i;
struct hlist_head *hash;
- hash = kmalloc_array(entries, sizeof(*hash), GFP_KERNEL);
+ hash = bpf_map_area_alloc(entries * sizeof(*hash), numa_node);
if (hash != NULL)
for (i = 0; i < entries; i++)
INIT_HLIST_HEAD(&hash[i]);
@@ -145,7 +146,8 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
return -EINVAL;
if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
- dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets);
+ dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets,
+ dtab->map.numa_node);
if (!dtab->dev_index_head)
goto free_charge;
@@ -232,7 +234,7 @@ static void dev_map_free(struct bpf_map *map)
}
}
- kfree(dtab->dev_index_head);
+ bpf_map_area_free(dtab->dev_index_head);
} else {
for (i = 0; i < dtab->map.max_entries; i++) {
struct bpf_dtab_netdev *dev;
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index ccc0f98abdd4..9e5934780f41 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -169,18 +169,18 @@ int __weak kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
{
int err;
- err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
+ err = copy_from_kernel_nofault(bpt->saved_instr, (char *)bpt->bpt_addr,
BREAK_INSTR_SIZE);
if (err)
return err;
- err = probe_kernel_write((char *)bpt->bpt_addr,
+ err = copy_to_kernel_nofault((char *)bpt->bpt_addr,
arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
return err;
}
int __weak kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
{
- return probe_kernel_write((char *)bpt->bpt_addr,
+ return copy_to_kernel_nofault((char *)bpt->bpt_addr,
(char *)bpt->saved_instr, BREAK_INSTR_SIZE);
}
@@ -587,6 +587,7 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs,
arch_kgdb_ops.disable_hw_break(regs);
acquirelock:
+ rcu_read_lock();
/*
* Interrupts will be restored by the 'trap return' code, except when
* single stepping.
@@ -646,6 +647,7 @@ return_normal:
atomic_dec(&slaves_in_kgdb);
dbg_touch_watchdogs();
local_irq_restore(flags);
+ rcu_read_unlock();
return 0;
}
cpu_relax();
@@ -664,6 +666,7 @@ return_normal:
raw_spin_unlock(&dbg_master_lock);
dbg_touch_watchdogs();
local_irq_restore(flags);
+ rcu_read_unlock();
goto acquirelock;
}
@@ -787,6 +790,7 @@ kgdb_restore:
raw_spin_unlock(&dbg_master_lock);
dbg_touch_watchdogs();
local_irq_restore(flags);
+ rcu_read_unlock();
return kgdb_info[cpu].ret_state;
}
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index 4b280fc7dd67..61774aec46b4 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -247,7 +247,7 @@ char *kgdb_mem2hex(char *mem, char *buf, int count)
*/
tmp = buf + count;
- err = probe_kernel_read(tmp, mem, count);
+ err = copy_from_kernel_nofault(tmp, mem, count);
if (err)
return NULL;
while (count > 0) {
@@ -283,7 +283,7 @@ int kgdb_hex2mem(char *buf, char *mem, int count)
*tmp_raw |= hex_to_bin(*tmp_hex--) << 4;
}
- return probe_kernel_write(mem, tmp_raw, count);
+ return copy_to_kernel_nofault(mem, tmp_raw, count);
}
/*
@@ -335,7 +335,7 @@ static int kgdb_ebin2mem(char *buf, char *mem, int count)
size++;
}
- return probe_kernel_write(mem, c, size);
+ return copy_to_kernel_nofault(mem, c, size);
}
#if DBG_MAX_REG_NUM > 0
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 924bc9298a42..683a799618ad 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -542,6 +542,44 @@ static int kdb_search_string(char *searched, char *searchfor)
return 0;
}
+static void kdb_msg_write(const char *msg, int msg_len)
+{
+ struct console *c;
+
+ if (msg_len == 0)
+ return;
+
+ if (dbg_io_ops) {
+ const char *cp = msg;
+ int len = msg_len;
+
+ while (len--) {
+ dbg_io_ops->write_char(*cp);
+ cp++;
+ }
+ }
+
+ for_each_console(c) {
+ if (!(c->flags & CON_ENABLED))
+ continue;
+ if (c == dbg_io_ops->cons)
+ continue;
+ /*
+ * Set oops_in_progress to encourage the console drivers to
+ * disregard their internal spin locks: in the current calling
+ * context the risk of deadlock is a bigger problem than risks
+ * due to re-entering the console driver. We operate directly on
+ * oops_in_progress rather than using bust_spinlocks() because
+ * the calls bust_spinlocks() makes on exit are not appropriate
+ * for this calling context.
+ */
+ ++oops_in_progress;
+ c->write(c, msg, msg_len);
+ --oops_in_progress;
+ touch_nmi_watchdog();
+ }
+}
+
int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
{
int diag;
@@ -553,7 +591,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
int this_cpu, old_cpu;
char *cp, *cp2, *cphold = NULL, replaced_byte = ' ';
char *moreprompt = "more> ";
- struct console *c;
unsigned long uninitialized_var(flags);
/* Serialize kdb_printf if multiple cpus try to write at once.
@@ -687,22 +724,11 @@ kdb_printit:
*/
retlen = strlen(kdb_buffer);
cp = (char *) printk_skip_headers(kdb_buffer);
- if (!dbg_kdb_mode && kgdb_connected) {
+ if (!dbg_kdb_mode && kgdb_connected)
gdbstub_msg_write(cp, retlen - (cp - kdb_buffer));
- } else {
- if (dbg_io_ops && !dbg_io_ops->is_console) {
- len = retlen - (cp - kdb_buffer);
- cp2 = cp;
- while (len--) {
- dbg_io_ops->write_char(*cp2);
- cp2++;
- }
- }
- for_each_console(c) {
- c->write(c, cp, retlen - (cp - kdb_buffer));
- touch_nmi_watchdog();
- }
- }
+ else
+ kdb_msg_write(cp, retlen - (cp - kdb_buffer));
+
if (logging) {
saved_loglevel = console_loglevel;
console_loglevel = CONSOLE_LOGLEVEL_SILENT;
@@ -751,19 +777,7 @@ kdb_printit:
moreprompt = "more> ";
kdb_input_flush();
-
- if (dbg_io_ops && !dbg_io_ops->is_console) {
- len = strlen(moreprompt);
- cp = moreprompt;
- while (len--) {
- dbg_io_ops->write_char(*cp);
- cp++;
- }
- }
- for_each_console(c) {
- c->write(c, moreprompt, strlen(moreprompt));
- touch_nmi_watchdog();
- }
+ kdb_msg_write(moreprompt, strlen(moreprompt));
if (logging)
printk("%s", moreprompt);
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index ec190569f690..5c7949061671 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2326,7 +2326,8 @@ void kdb_ps1(const struct task_struct *p)
int cpu;
unsigned long tmp;
- if (!p || probe_kernel_read(&tmp, (char *)p, sizeof(unsigned long)))
+ if (!p ||
+ copy_from_kernel_nofault(&tmp, (char *)p, sizeof(unsigned long)))
return;
cpu = kdb_process_cpu(p);
diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c
index b8e6306e7e13..004c5b6c87f8 100644
--- a/kernel/debug/kdb/kdb_support.c
+++ b/kernel/debug/kdb/kdb_support.c
@@ -325,7 +325,7 @@ char *kdb_strdup(const char *str, gfp_t type)
*/
int kdb_getarea_size(void *res, unsigned long addr, size_t size)
{
- int ret = probe_kernel_read((char *)res, (char *)addr, size);
+ int ret = copy_from_kernel_nofault((char *)res, (char *)addr, size);
if (ret) {
if (!KDB_STATE(SUPPRESS)) {
kdb_printf("kdb_getarea: Bad address 0x%lx\n", addr);
@@ -350,7 +350,7 @@ int kdb_getarea_size(void *res, unsigned long addr, size_t size)
*/
int kdb_putarea_size(unsigned long addr, void *res, size_t size)
{
- int ret = probe_kernel_read((char *)addr, (char *)res, size);
+ int ret = copy_from_kernel_nofault((char *)addr, (char *)res, size);
if (ret) {
if (!KDB_STATE(SUPPRESS)) {
kdb_printf("kdb_putarea: Bad address 0x%lx\n", addr);
@@ -624,7 +624,8 @@ char kdb_task_state_char (const struct task_struct *p)
char state;
unsigned long tmp;
- if (!p || probe_kernel_read(&tmp, (char *)p, sizeof(unsigned long)))
+ if (!p ||
+ copy_from_kernel_nofault(&tmp, (char *)p, sizeof(unsigned long)))
return 'E';
cpu = kdb_process_cpu(p);
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index d006668c0027..1da3f44f2565 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -71,20 +71,21 @@ config SWIOTLB
# in the pagetables
#
config DMA_NONCOHERENT_MMAP
+ default y if !MMU
bool
-config DMA_REMAP
- depends on MMU
+config DMA_COHERENT_POOL
select GENERIC_ALLOCATOR
- select DMA_NONCOHERENT_MMAP
bool
-config DMA_COHERENT_POOL
+config DMA_REMAP
bool
- select DMA_REMAP
+ depends on MMU
+ select DMA_NONCOHERENT_MMAP
config DMA_DIRECT_REMAP
bool
+ select DMA_REMAP
select DMA_COHERENT_POOL
config DMA_CMA
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 0a4881e59aa7..93f578a8e613 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -109,14 +109,15 @@ static inline bool dma_should_free_from_pool(struct device *dev,
return false;
}
-struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
+static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
gfp_t gfp, unsigned long attrs)
{
- size_t alloc_size = PAGE_ALIGN(size);
int node = dev_to_node(dev);
struct page *page = NULL;
u64 phys_limit;
+ WARN_ON_ONCE(!PAGE_ALIGNED(size));
+
if (attrs & DMA_ATTR_NO_WARN)
gfp |= __GFP_NOWARN;
@@ -124,14 +125,14 @@ struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
gfp &= ~__GFP_ZERO;
gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
&phys_limit);
- page = dma_alloc_contiguous(dev, alloc_size, gfp);
+ page = dma_alloc_contiguous(dev, size, gfp);
if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
- dma_free_contiguous(dev, page, alloc_size);
+ dma_free_contiguous(dev, page, size);
page = NULL;
}
again:
if (!page)
- page = alloc_pages_node(node, gfp, get_order(alloc_size));
+ page = alloc_pages_node(node, gfp, get_order(size));
if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
dma_free_contiguous(dev, page, size);
page = NULL;
@@ -157,9 +158,12 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
{
struct page *page;
void *ret;
+ int err;
+
+ size = PAGE_ALIGN(size);
if (dma_should_alloc_from_pool(dev, gfp, attrs)) {
- ret = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page, gfp);
+ ret = dma_alloc_from_pool(dev, size, &page, gfp);
if (!ret)
return NULL;
goto done;
@@ -183,14 +187,20 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
dma_alloc_need_uncached(dev, attrs)) ||
(IS_ENABLED(CONFIG_DMA_REMAP) && PageHighMem(page))) {
/* remove any dirty cache lines on the kernel alias */
- arch_dma_prep_coherent(page, PAGE_ALIGN(size));
+ arch_dma_prep_coherent(page, size);
/* create a coherent mapping */
- ret = dma_common_contiguous_remap(page, PAGE_ALIGN(size),
+ ret = dma_common_contiguous_remap(page, size,
dma_pgprot(dev, PAGE_KERNEL, attrs),
__builtin_return_address(0));
if (!ret)
goto out_free_pages;
+ if (force_dma_unencrypted(dev)) {
+ err = set_memory_decrypted((unsigned long)ret,
+ 1 << get_order(size));
+ if (err)
+ goto out_free_pages;
+ }
memset(ret, 0, size);
goto done;
}
@@ -207,8 +217,12 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
}
ret = page_address(page);
- if (force_dma_unencrypted(dev))
- set_memory_decrypted((unsigned long)ret, 1 << get_order(size));
+ if (force_dma_unencrypted(dev)) {
+ err = set_memory_decrypted((unsigned long)ret,
+ 1 << get_order(size));
+ if (err)
+ goto out_free_pages;
+ }
memset(ret, 0, size);
@@ -217,7 +231,7 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
arch_dma_prep_coherent(page, size);
ret = arch_dma_set_uncached(ret, size);
if (IS_ERR(ret))
- goto out_free_pages;
+ goto out_encrypt_pages;
}
done:
if (force_dma_unencrypted(dev))
@@ -225,6 +239,15 @@ done:
else
*dma_handle = phys_to_dma(dev, page_to_phys(page));
return ret;
+
+out_encrypt_pages:
+ if (force_dma_unencrypted(dev)) {
+ err = set_memory_encrypted((unsigned long)page_address(page),
+ 1 << get_order(size));
+ /* If memory cannot be re-encrypted, it must be leaked */
+ if (err)
+ return NULL;
+ }
out_free_pages:
dma_free_contiguous(dev, page, size);
return NULL;
@@ -459,7 +482,6 @@ int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt,
return ret;
}
-#ifdef CONFIG_MMU
bool dma_direct_can_mmap(struct device *dev)
{
return dev_is_dma_coherent(dev) ||
@@ -485,19 +507,6 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma,
return remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff,
user_count << PAGE_SHIFT, vma->vm_page_prot);
}
-#else /* CONFIG_MMU */
-bool dma_direct_can_mmap(struct device *dev)
-{
- return false;
-}
-
-int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t dma_addr, size_t size,
- unsigned long attrs)
-{
- return -ENXIO;
-}
-#endif /* CONFIG_MMU */
int dma_direct_supported(struct device *dev, u64 mask)
{
diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
index 35bb51c31fff..8cfa01243ed2 100644
--- a/kernel/dma/pool.c
+++ b/kernel/dma/pool.c
@@ -175,10 +175,9 @@ static int __init dma_atomic_pool_init(void)
* sizes to 128KB per 1GB of memory, min 128KB, max MAX_ORDER-1.
*/
if (!atomic_pool_size) {
- atomic_pool_size = max(totalram_pages() >> PAGE_SHIFT, 1UL) *
- SZ_128K;
- atomic_pool_size = min_t(size_t, atomic_pool_size,
- 1 << (PAGE_SHIFT + MAX_ORDER-1));
+ unsigned long pages = totalram_pages() / (SZ_1G / SZ_128K);
+ pages = min_t(unsigned long, pages, MAX_ORDER_NR_PAGES);
+ atomic_pool_size = max_t(size_t, pages << PAGE_SHIFT, SZ_128K);
}
INIT_WORK(&atomic_pool_work, atomic_pool_work_fn);
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index e739a6eea6e7..78b23f089cf1 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -24,7 +24,8 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
{
void *vaddr;
- vaddr = vmap(pages, size >> PAGE_SHIFT, VM_DMA_COHERENT, prot);
+ vaddr = vmap(pages, PAGE_ALIGN(size) >> PAGE_SHIFT,
+ VM_DMA_COHERENT, prot);
if (vaddr)
find_vm_area(vaddr)->pages = pages;
return vaddr;
@@ -37,7 +38,7 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
void *dma_common_contiguous_remap(struct page *page, size_t size,
pgprot_t prot, const void *caller)
{
- int count = size >> PAGE_SHIFT;
+ int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
struct page **pages;
void *vaddr;
int i;
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index bb05fd52de85..09cc78df53c6 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -181,34 +181,19 @@ void kimage_file_post_load_cleanup(struct kimage *image)
static int
kimage_validate_signature(struct kimage *image)
{
- const char *reason;
int ret;
ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
image->kernel_buf_len);
- switch (ret) {
- case 0:
- break;
+ if (ret) {
- /* Certain verification errors are non-fatal if we're not
- * checking errors, provided we aren't mandating that there
- * must be a valid signature.
- */
- case -ENODATA:
- reason = "kexec of unsigned image";
- goto decide;
- case -ENOPKG:
- reason = "kexec of image with unsupported crypto";
- goto decide;
- case -ENOKEY:
- reason = "kexec of image with unavailable key";
- decide:
if (IS_ENABLED(CONFIG_KEXEC_SIG_FORCE)) {
- pr_notice("%s rejected\n", reason);
+ pr_notice("Enforced kernel signature verification failed (%d).\n", ret);
return ret;
}
- /* If IMA is guaranteed to appraise a signature on the kexec
+ /*
+ * If IMA is guaranteed to appraise a signature on the kexec
* image, permit it even if the kernel is otherwise locked
* down.
*/
@@ -216,17 +201,10 @@ kimage_validate_signature(struct kimage *image)
security_locked_down(LOCKDOWN_KEXEC))
return -EPERM;
- return 0;
-
- /* All other errors are fatal, including nomem, unparseable
- * signatures and signature check failures - even if signatures
- * aren't required.
- */
- default:
- pr_notice("kernel signature verification failed (%d).\n", ret);
+ pr_debug("kernel signature verification failed (%d).\n", ret);
}
- return ret;
+ return 0;
}
#endif
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 2b58740ca0f3..0924397d9d59 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -47,6 +47,11 @@
static int kprobes_initialized;
+/* kprobe_table can be accessed by
+ * - Normal hlist traversal and RCU add/del under kprobe_mutex is held.
+ * Or
+ * - RCU hlist traversal under disabling preempt (breakpoint handlers)
+ */
static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
@@ -361,7 +366,8 @@ struct kprobe *get_kprobe(void *addr)
struct kprobe *p;
head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
- hlist_for_each_entry_rcu(p, head, hlist) {
+ hlist_for_each_entry_rcu(p, head, hlist,
+ lockdep_is_held(&kprobe_mutex)) {
if (p->addr == addr)
return p;
}
@@ -621,11 +627,12 @@ static void kprobe_optimizer(struct work_struct *work)
mutex_unlock(&module_mutex);
mutex_unlock(&text_mutex);
cpus_read_unlock();
- mutex_unlock(&kprobe_mutex);
/* Step 5: Kick optimizer again if needed */
if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
kick_kprobe_optimizer();
+
+ mutex_unlock(&kprobe_mutex);
}
/* Wait for completing optimization and unoptimization */
@@ -703,8 +710,6 @@ static void force_unoptimize_kprobe(struct optimized_kprobe *op)
lockdep_assert_cpus_held();
arch_unoptimize_kprobe(op);
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
- if (kprobe_disabled(&op->kp))
- arch_disarm_kprobe(&op->kp);
}
/* Unoptimize a kprobe if p is optimized */
@@ -884,7 +889,7 @@ static void optimize_all_kprobes(void)
kprobes_allow_optimization = true;
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
- hlist_for_each_entry_rcu(p, head, hlist)
+ hlist_for_each_entry(p, head, hlist)
if (!kprobe_disabled(p))
optimize_kprobe(p);
}
@@ -911,7 +916,7 @@ static void unoptimize_all_kprobes(void)
kprobes_allow_optimization = false;
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
- hlist_for_each_entry_rcu(p, head, hlist) {
+ hlist_for_each_entry(p, head, hlist) {
if (!kprobe_disabled(p))
unoptimize_kprobe(p, false);
}
@@ -1271,6 +1276,26 @@ __releases(hlist_lock)
}
NOKPROBE_SYMBOL(kretprobe_table_unlock);
+struct kprobe kprobe_busy = {
+ .addr = (void *) get_kprobe,
+};
+
+void kprobe_busy_begin(void)
+{
+ struct kprobe_ctlblk *kcb;
+
+ preempt_disable();
+ __this_cpu_write(current_kprobe, &kprobe_busy);
+ kcb = get_kprobe_ctlblk();
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+}
+
+void kprobe_busy_end(void)
+{
+ __this_cpu_write(current_kprobe, NULL);
+ preempt_enable();
+}
+
/*
* This function is called from finish_task_switch when task tk becomes dead,
* so that we can recycle any function-return probe instances associated
@@ -1288,6 +1313,8 @@ void kprobe_flush_task(struct task_struct *tk)
/* Early boot. kretprobe_table_locks not yet initialized. */
return;
+ kprobe_busy_begin();
+
INIT_HLIST_HEAD(&empty_rp);
hash = hash_ptr(tk, KPROBE_HASH_BITS);
head = &kretprobe_inst_table[hash];
@@ -1301,6 +1328,8 @@ void kprobe_flush_task(struct task_struct *tk)
hlist_del(&ri->hlist);
kfree(ri);
}
+
+ kprobe_busy_end();
}
NOKPROBE_SYMBOL(kprobe_flush_task);
@@ -1534,12 +1563,14 @@ static struct kprobe *__get_valid_kprobe(struct kprobe *p)
{
struct kprobe *ap, *list_p;
+ lockdep_assert_held(&kprobe_mutex);
+
ap = get_kprobe(p->addr);
if (unlikely(!ap))
return NULL;
if (p != ap) {
- list_for_each_entry_rcu(list_p, &ap->list, list)
+ list_for_each_entry(list_p, &ap->list, list)
if (list_p == p)
/* kprobe p is a valid probe */
goto valid;
@@ -1704,7 +1735,9 @@ static int aggr_kprobe_disabled(struct kprobe *ap)
{
struct kprobe *kp;
- list_for_each_entry_rcu(kp, &ap->list, list)
+ lockdep_assert_held(&kprobe_mutex);
+
+ list_for_each_entry(kp, &ap->list, list)
if (!kprobe_disabled(kp))
/*
* There is an active probe on the list.
@@ -1783,7 +1816,7 @@ static int __unregister_kprobe_top(struct kprobe *p)
else {
/* If disabling probe has special handlers, update aggrprobe */
if (p->post_handler && !kprobe_gone(p)) {
- list_for_each_entry_rcu(list_p, &ap->list, list) {
+ list_for_each_entry(list_p, &ap->list, list) {
if ((list_p != p) && (list_p->post_handler))
goto noclean;
}
@@ -2097,13 +2130,15 @@ static void kill_kprobe(struct kprobe *p)
{
struct kprobe *kp;
+ lockdep_assert_held(&kprobe_mutex);
+
p->flags |= KPROBE_FLAG_GONE;
if (kprobe_aggrprobe(p)) {
/*
* If this is an aggr_kprobe, we have to list all the
* chained probes and mark them GONE.
*/
- list_for_each_entry_rcu(kp, &p->list, list)
+ list_for_each_entry(kp, &p->list, list)
kp->flags |= KPROBE_FLAG_GONE;
p->post_handler = NULL;
kill_optimized_kprobe(p);
@@ -2369,7 +2404,7 @@ static int kprobes_module_callback(struct notifier_block *nb,
mutex_lock(&kprobe_mutex);
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
- hlist_for_each_entry_rcu(p, head, hlist)
+ hlist_for_each_entry(p, head, hlist)
if (within_module_init((unsigned long)p->addr, mod) ||
(checkcore &&
within_module_core((unsigned long)p->addr, mod))) {
@@ -2607,7 +2642,7 @@ static int arm_all_kprobes(void)
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
/* Arm all kprobes on a best-effort basis */
- hlist_for_each_entry_rcu(p, head, hlist) {
+ hlist_for_each_entry(p, head, hlist) {
if (!kprobe_disabled(p)) {
err = arm_kprobe(p);
if (err) {
@@ -2650,7 +2685,7 @@ static int disarm_all_kprobes(void)
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
/* Disarm all kprobes on a best-effort basis */
- hlist_for_each_entry_rcu(p, head, hlist) {
+ hlist_for_each_entry(p, head, hlist) {
if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) {
err = disarm_kprobe(p, false);
if (err) {
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 8e3d2d7fdf5e..132f84a5fde3 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -201,7 +201,7 @@ void *kthread_probe_data(struct task_struct *task)
struct kthread *kthread = to_kthread(task);
void *data = NULL;
- probe_kernel_read(&data, &kthread->data, sizeof(data));
+ copy_from_kernel_nofault(&data, &kthread->data, sizeof(data));
return data;
}
diff --git a/kernel/module.c b/kernel/module.c
index e8a198588f26..0c6573b98c36 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2783,7 +2783,9 @@ static void dynamic_debug_remove(struct module *mod, struct _ddebug *debug)
void * __weak module_alloc(unsigned long size)
{
- return vmalloc_exec(size);
+ return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
+ GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
+ NUMA_NO_NODE, __func__);
}
bool __weak module_init_section(const char *name)
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index b03df67621d0..cd356630a311 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -531,7 +531,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, flags)
} else if (!IS_ERR(pidfd_pid(file))) {
err = check_setns_flags(flags);
} else {
- err = -EBADF;
+ err = -EINVAL;
}
if (err)
goto out;
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 8c14835be46c..b71eaf5f5a86 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -974,16 +974,6 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
user->idx = log_next_idx;
user->seq = log_next_seq;
break;
- case SEEK_CUR:
- /*
- * It isn't supported due to the record nature of this
- * interface: _SET _DATA and _END point to very specific
- * record positions, while _CUR would be more useful in case
- * of a byte-based log. Because of that, return the default
- * errno value for invalid seek operation.
- */
- ret = -ESPIPE;
- break;
default:
ret = -EINVAL;
}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index c716eadc7617..6c6569e0586c 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -250,7 +250,7 @@ static noinstr void rcu_dynticks_eqs_enter(void)
* next idle sojourn.
*/
rcu_dynticks_task_trace_enter(); // Before ->dynticks update!
- seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
+ seq = arch_atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
// RCU is no longer watching. Better be in extended quiescent state!
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
(seq & RCU_DYNTICK_CTRL_CTR));
@@ -274,13 +274,13 @@ static noinstr void rcu_dynticks_eqs_exit(void)
* and we also must force ordering with the next RCU read-side
* critical section.
*/
- seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
+ seq = arch_atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
// RCU is now watching. Better not be in an extended quiescent state!
rcu_dynticks_task_trace_exit(); // After ->dynticks update!
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
!(seq & RCU_DYNTICK_CTRL_CTR));
if (seq & RCU_DYNTICK_CTRL_MASK) {
- atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks);
+ arch_atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks);
smp_mb__after_atomic(); /* _exit after clearing mask. */
}
}
@@ -313,7 +313,7 @@ static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
- return !(atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR);
+ return !(arch_atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR);
}
/*
@@ -633,6 +633,10 @@ static noinstr void rcu_eqs_enter(bool user)
do_nocb_deferred_wakeup(rdp);
rcu_prepare_for_idle();
rcu_preempt_deferred_qs(current);
+
+ // instrumentation for the noinstr rcu_dynticks_eqs_enter()
+ instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
+
instrumentation_end();
WRITE_ONCE(rdp->dynticks_nesting, 0); /* Avoid irq-access tearing. */
// RCU is watching here ...
@@ -692,6 +696,7 @@ noinstr void rcu_nmi_exit(void)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
+ instrumentation_begin();
/*
* Check for ->dynticks_nmi_nesting underflow and bad ->dynticks.
* (We are exiting an NMI handler, so RCU better be paying attention
@@ -705,7 +710,6 @@ noinstr void rcu_nmi_exit(void)
* leave it in non-RCU-idle state.
*/
if (rdp->dynticks_nmi_nesting != 1) {
- instrumentation_begin();
trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2,
atomic_read(&rdp->dynticks));
WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */
@@ -714,13 +718,15 @@ noinstr void rcu_nmi_exit(void)
return;
}
- instrumentation_begin();
/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks));
WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
if (!in_nmi())
rcu_prepare_for_idle();
+
+ // instrumentation for the noinstr rcu_dynticks_eqs_enter()
+ instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
instrumentation_end();
// RCU is watching here ...
@@ -838,6 +844,10 @@ static void noinstr rcu_eqs_exit(bool user)
rcu_dynticks_eqs_exit();
// ... but is watching here.
instrumentation_begin();
+
+ // instrumentation for the noinstr rcu_dynticks_eqs_exit()
+ instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
+
rcu_cleanup_after_idle();
trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks));
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
@@ -983,13 +993,21 @@ noinstr void rcu_nmi_enter(void)
if (!in_nmi())
rcu_cleanup_after_idle();
+ instrumentation_begin();
+ // instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs()
+ instrument_atomic_read(&rdp->dynticks, sizeof(rdp->dynticks));
+ // instrumentation for the noinstr rcu_dynticks_eqs_exit()
+ instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
+
incby = 1;
} else if (!in_nmi()) {
instrumentation_begin();
rcu_irq_enter_check_tick();
instrumentation_end();
+ } else {
+ instrumentation_begin();
}
- instrumentation_begin();
+
trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
rdp->dynticks_nmi_nesting,
rdp->dynticks_nmi_nesting + incby, atomic_read(&rdp->dynticks));
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8f360326861e..ca5db40392d4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1637,7 +1637,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
goto out;
}
- if (cpumask_equal(p->cpus_ptr, new_mask))
+ if (cpumask_equal(&p->cpus_mask, new_mask))
goto out;
/*
@@ -2293,8 +2293,15 @@ void sched_ttwu_pending(void *arg)
rq_lock_irqsave(rq, &rf);
update_rq_clock(rq);
- llist_for_each_entry_safe(p, t, llist, wake_entry)
+ llist_for_each_entry_safe(p, t, llist, wake_entry.llist) {
+ if (WARN_ON_ONCE(p->on_cpu))
+ smp_cond_load_acquire(&p->on_cpu, !VAL);
+
+ if (WARN_ON_ONCE(task_cpu(p) != cpu_of(rq)))
+ set_task_cpu(p, cpu_of(rq));
+
ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0, &rf);
+ }
rq_unlock_irqrestore(rq, &rf);
}
@@ -2322,7 +2329,7 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags
p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);
WRITE_ONCE(rq->ttwu_pending, 1);
- __smp_call_single_queue(cpu, &p->wake_entry);
+ __smp_call_single_queue(cpu, &p->wake_entry.llist);
}
void wake_up_if_idle(int cpu)
@@ -2369,7 +2376,7 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags)
* the soon-to-be-idle CPU as the current CPU is likely busy.
* nr_running is checked to avoid unnecessary task stacking.
*/
- if ((wake_flags & WF_ON_RQ) && cpu_rq(cpu)->nr_running <= 1)
+ if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1)
return true;
return false;
@@ -2378,6 +2385,9 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags)
static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
{
if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) {
+ if (WARN_ON_ONCE(cpu == smp_processor_id()))
+ return false;
+
sched_clock_cpu(cpu); /* Sync clocks across CPUs */
__ttwu_queue_wakelist(p, cpu, wake_flags);
return true;
@@ -2528,7 +2538,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
goto out;
success = 1;
- cpu = task_cpu(p);
trace_sched_waking(p);
p->state = TASK_RUNNING;
trace_sched_wakeup(p);
@@ -2550,7 +2559,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
/* We're going to change ->state: */
success = 1;
- cpu = task_cpu(p);
/*
* Ensure we load p->on_rq _after_ p->state, otherwise it would
@@ -2614,8 +2622,21 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
* which potentially sends an IPI instead of spinning on p->on_cpu to
* let the waker make forward progress. This is safe because IRQs are
* disabled and the IPI will deliver after on_cpu is cleared.
+ *
+ * Ensure we load task_cpu(p) after p->on_cpu:
+ *
+ * set_task_cpu(p, cpu);
+ * STORE p->cpu = @cpu
+ * __schedule() (switch to task 'p')
+ * LOCK rq->lock
+ * smp_mb__after_spin_lock() smp_cond_load_acquire(&p->on_cpu)
+ * STORE p->on_cpu = 1 LOAD p->cpu
+ *
+ * to ensure we observe the correct CPU on which the task is currently
+ * scheduling.
*/
- if (READ_ONCE(p->on_cpu) && ttwu_queue_wakelist(p, cpu, wake_flags | WF_ON_RQ))
+ if (smp_load_acquire(&p->on_cpu) &&
+ ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU))
goto unlock;
/*
@@ -2635,6 +2656,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
psi_ttwu_dequeue(p);
set_task_cpu(p, cpu);
}
+#else
+ cpu = task_cpu(p);
#endif /* CONFIG_SMP */
ttwu_queue(p, cpu, wake_flags);
@@ -2642,7 +2665,7 @@ unlock:
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
out:
if (success)
- ttwu_stat(p, cpu, wake_flags);
+ ttwu_stat(p, task_cpu(p), wake_flags);
preempt_enable();
return success;
@@ -2763,7 +2786,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
#endif
init_numa_balancing(clone_flags, p);
#ifdef CONFIG_SMP
- p->wake_entry_type = CSD_TYPE_TTWU;
+ p->wake_entry.u_flags = CSD_TYPE_TTWU;
#endif
}
@@ -4533,7 +4556,8 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
*/
if (dl_prio(prio)) {
if (!dl_prio(p->normal_prio) ||
- (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
+ (pi_task && dl_prio(pi_task->prio) &&
+ dl_entity_preempt(&pi_task->dl, &p->dl))) {
p->dl.dl_boosted = 1;
queue_flag |= ENQUEUE_REPLENISH;
} else
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 504d2f51b0d6..f63f337c7147 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2692,6 +2692,7 @@ void __dl_clear_params(struct task_struct *p)
dl_se->dl_bw = 0;
dl_se->dl_density = 0;
+ dl_se->dl_boosted = 0;
dl_se->dl_throttled = 0;
dl_se->dl_yielded = 0;
dl_se->dl_non_contending = 0;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index cbcb2f71599b..658aa7a2ae6f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -806,7 +806,7 @@ void post_init_entity_util_avg(struct task_struct *p)
}
}
- sa->runnable_avg = cpu_scale;
+ sa->runnable_avg = sa->util_avg;
if (p->sched_class != &fair_sched_class) {
/*
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 05deb81bb3e3..1ae95b9150d3 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -96,6 +96,15 @@ void __cpuidle default_idle_call(void)
}
}
+static int call_cpuidle_s2idle(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev)
+{
+ if (current_clr_polling_and_test())
+ return -EBUSY;
+
+ return cpuidle_enter_s2idle(drv, dev);
+}
+
static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
int next_state)
{
@@ -171,11 +180,9 @@ static void cpuidle_idle_call(void)
if (idle_should_enter_s2idle()) {
rcu_idle_enter();
- entered_state = cpuidle_enter_s2idle(drv, dev);
- if (entered_state > 0) {
- local_irq_enable();
+ entered_state = call_cpuidle_s2idle(drv, dev);
+ if (entered_state > 0)
goto exit_idle;
- }
rcu_idle_exit();
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 1d4e94c1e5fe..877fb08eb1b0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1682,7 +1682,7 @@ static inline int task_on_rq_migrating(struct task_struct *p)
#define WF_SYNC 0x01 /* Waker goes to sleep after wakeup */
#define WF_FORK 0x02 /* Child wakeup after fork */
#define WF_MIGRATED 0x04 /* Internal use, task got migrated */
-#define WF_ON_RQ 0x08 /* Wakee is on_rq */
+#define WF_ON_CPU 0x08 /* Wakee is on_cpu */
/*
* To aid in avoiding the subversion of "niceness" due to uneven distribution
diff --git a/kernel/smp.c b/kernel/smp.c
index 472c2b274c65..aa17eedff5be 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -669,24 +669,6 @@ void __init smp_init(void)
{
int num_nodes, num_cpus;
- /*
- * Ensure struct irq_work layout matches so that
- * flush_smp_call_function_queue() can do horrible things.
- */
- BUILD_BUG_ON(offsetof(struct irq_work, llnode) !=
- offsetof(struct __call_single_data, llist));
- BUILD_BUG_ON(offsetof(struct irq_work, func) !=
- offsetof(struct __call_single_data, func));
- BUILD_BUG_ON(offsetof(struct irq_work, flags) !=
- offsetof(struct __call_single_data, flags));
-
- /*
- * Assert the CSD_TYPE_TTWU layout is similar enough
- * for task_struct to be on the @call_single_queue.
- */
- BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) !=
- offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist));
-
idle_threads_init();
cpuhp_threads_init();
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 5773f0ba7e76..5ef0484513ec 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -3,6 +3,9 @@
* Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
*
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/blkdev.h>
#include <linux/blktrace_api.h>
@@ -344,7 +347,8 @@ static int __blk_trace_remove(struct request_queue *q)
{
struct blk_trace *bt;
- bt = xchg(&q->blk_trace, NULL);
+ bt = rcu_replace_pointer(q->blk_trace, NULL,
+ lockdep_is_held(&q->blk_trace_mutex));
if (!bt)
return -EINVAL;
@@ -494,6 +498,17 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
*/
strreplace(buts->name, '/', '_');
+ /*
+ * bdev can be NULL, as with scsi-generic, this is a helpful as
+ * we can be.
+ */
+ if (rcu_dereference_protected(q->blk_trace,
+ lockdep_is_held(&q->blk_trace_mutex))) {
+ pr_warn("Concurrent blktraces are not allowed on %s\n",
+ buts->name);
+ return -EBUSY;
+ }
+
bt = kzalloc(sizeof(*bt), GFP_KERNEL);
if (!bt)
return -ENOMEM;
@@ -543,10 +558,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
bt->pid = buts->pid;
bt->trace_state = Blktrace_setup;
- ret = -EBUSY;
- if (cmpxchg(&q->blk_trace, NULL, bt))
- goto err;
-
+ rcu_assign_pointer(q->blk_trace, bt);
get_probe_ref();
ret = 0;
@@ -1629,7 +1641,8 @@ static int blk_trace_remove_queue(struct request_queue *q)
{
struct blk_trace *bt;
- bt = xchg(&q->blk_trace, NULL);
+ bt = rcu_replace_pointer(q->blk_trace, NULL,
+ lockdep_is_held(&q->blk_trace_mutex));
if (bt == NULL)
return -EINVAL;
@@ -1661,10 +1674,7 @@ static int blk_trace_setup_queue(struct request_queue *q,
blk_trace_setup_lba(bt, bdev);
- ret = -EBUSY;
- if (cmpxchg(&q->blk_trace, NULL, bt))
- goto free_bt;
-
+ rcu_assign_pointer(q->blk_trace, bt);
get_probe_ref();
return 0;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index e729c9e587a0..7bc3d6175868 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -141,7 +141,7 @@ bpf_probe_read_user_common(void *dst, u32 size, const void __user *unsafe_ptr)
{
int ret;
- ret = probe_user_read(dst, unsafe_ptr, size);
+ ret = copy_from_user_nofault(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
memset(dst, 0, size);
return ret;
@@ -196,7 +196,7 @@ bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
if (unlikely(ret < 0))
goto fail;
- ret = probe_kernel_read(dst, unsafe_ptr, size);
+ ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
goto fail;
return ret;
@@ -241,7 +241,7 @@ bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr)
if (unlikely(ret < 0))
goto fail;
- return 0;
+ return ret;
fail:
memset(dst, 0, size);
return ret;
@@ -326,7 +326,7 @@ BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src,
if (unlikely(!nmi_uaccess_okay()))
return -EPERM;
- return probe_user_write(unsafe_ptr, src, size);
+ return copy_to_user_nofault(unsafe_ptr, src, size);
}
static const struct bpf_func_proto bpf_probe_write_user_proto = {
@@ -661,7 +661,7 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
copy_size = (fmt[i + 2] == '4') ? 4 : 16;
- err = probe_kernel_read(bufs->buf[memcpy_cnt],
+ err = copy_from_kernel_nofault(bufs->buf[memcpy_cnt],
(void *) (long) args[fmt_cnt],
copy_size);
if (err < 0)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index d6bba734ab72..72064541bef2 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2260,7 +2260,7 @@ ftrace_find_tramp_ops_next(struct dyn_ftrace *rec,
if (hash_contains_ip(ip, op->func_hash))
return op;
- }
+ }
return NULL;
}
@@ -3643,7 +3643,7 @@ static int t_show(struct seq_file *m, void *v)
if (direct)
seq_printf(m, "\n\tdirect-->%pS", (void *)direct);
}
- }
+ }
seq_putc(m, '\n');
@@ -7248,6 +7248,10 @@ static int pid_open(struct inode *inode, struct file *file, int type)
case TRACE_NO_PIDS:
seq_ops = &ftrace_no_pid_sops;
break;
+ default:
+ trace_array_put(tr);
+ WARN_ON_ONCE(1);
+ return -EINVAL;
}
ret = seq_open(file, seq_ops);
@@ -7326,6 +7330,10 @@ pid_write(struct file *filp, const char __user *ubuf,
other_pids = rcu_dereference_protected(tr->function_pids,
lockdep_is_held(&ftrace_lock));
break;
+ default:
+ ret = -EINVAL;
+ WARN_ON_ONCE(1);
+ goto out;
}
ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b8e1ca48be50..00867ff82412 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2427,7 +2427,7 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
if (unlikely(info->add_timestamp)) {
bool abs = ring_buffer_time_stamp_abs(cpu_buffer->buffer);
- event = rb_add_time_stamp(event, info->delta, abs);
+ event = rb_add_time_stamp(event, abs ? info->delta : delta, abs);
length -= RB_LEN_TIME_EXTEND;
delta = 0;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ec44b0e2a19c..bb62269724d5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3570,7 +3570,6 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
void tracing_iter_reset(struct trace_iterator *iter, int cpu)
{
- struct ring_buffer_event *event;
struct ring_buffer_iter *buf_iter;
unsigned long entries = 0;
u64 ts;
@@ -3588,7 +3587,7 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu)
* that a reset never took place on a cpu. This is evident
* by the timestamp being before the start of the buffer.
*/
- while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
+ while (ring_buffer_iter_peek(buf_iter, &ts)) {
if (ts >= iter->array_buffer->time_start)
break;
entries++;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index def769df5bf1..13db4000af3f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -61,6 +61,9 @@ enum trace_type {
#undef __field_desc
#define __field_desc(type, container, item)
+#undef __field_packed
+#define __field_packed(type, container, item)
+
#undef __array
#define __array(type, item, size) type item[size];
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 9de29bb45a27..fa0fc08c6ef8 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -101,12 +101,16 @@ trace_boot_add_kprobe_event(struct xbc_node *node, const char *event)
kprobe_event_cmd_init(&cmd, buf, MAX_BUF_LEN);
ret = kprobe_event_gen_cmd_start(&cmd, event, val);
- if (ret)
+ if (ret) {
+ pr_err("Failed to generate probe: %s\n", buf);
break;
+ }
ret = kprobe_event_gen_cmd_end(&cmd);
- if (ret)
+ if (ret) {
pr_err("Failed to add probe: %s\n", buf);
+ break;
+ }
}
return ret;
@@ -120,7 +124,7 @@ trace_boot_add_kprobe_event(struct xbc_node *node, const char *event)
}
#endif
-#ifdef CONFIG_HIST_TRIGGERS
+#ifdef CONFIG_SYNTH_EVENTS
static int __init
trace_boot_add_synth_event(struct xbc_node *node, const char *event)
{
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index a523da0dae0a..18c4a58aff79 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -78,8 +78,8 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
F_STRUCT(
__field_struct( struct ftrace_graph_ent, graph_ent )
- __field_desc( unsigned long, graph_ent, func )
- __field_desc( int, graph_ent, depth )
+ __field_packed( unsigned long, graph_ent, func )
+ __field_packed( int, graph_ent, depth )
),
F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth)
@@ -92,11 +92,11 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
F_STRUCT(
__field_struct( struct ftrace_graph_ret, ret )
- __field_desc( unsigned long, ret, func )
- __field_desc( unsigned long, ret, overrun )
- __field_desc( unsigned long long, ret, calltime)
- __field_desc( unsigned long long, ret, rettime )
- __field_desc( int, ret, depth )
+ __field_packed( unsigned long, ret, func )
+ __field_packed( unsigned long, ret, overrun )
+ __field_packed( unsigned long long, ret, calltime)
+ __field_packed( unsigned long long, ret, rettime )
+ __field_packed( int, ret, depth )
),
F_printk("<-- %ps (%d) (start: %llx end: %llx) over: %d",
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 3a74736da363..f725802160c0 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -216,11 +216,17 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file)
int trigger_process_regex(struct trace_event_file *file, char *buff)
{
- char *command, *next = buff;
+ char *command, *next;
struct event_command *p;
int ret = -EINVAL;
+ next = buff = skip_spaces(buff);
command = strsep(&next, ": \t");
+ if (next) {
+ next = skip_spaces(next);
+ if (!*next)
+ next = NULL;
+ }
command = (command[0] != '!') ? command : command + 1;
mutex_lock(&trigger_cmd_mutex);
@@ -630,8 +636,14 @@ event_trigger_callback(struct event_command *cmd_ops,
int ret;
/* separate the trigger from the filter (t:n [if filter]) */
- if (param && isdigit(param[0]))
+ if (param && isdigit(param[0])) {
trigger = strsep(&param, " \t");
+ if (param) {
+ param = skip_spaces(param);
+ if (!*param)
+ param = NULL;
+ }
+ }
trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger);
@@ -1368,6 +1380,11 @@ int event_enable_trigger_func(struct event_command *cmd_ops,
trigger = strsep(&param, " \t");
if (!trigger)
return -EINVAL;
+ if (param) {
+ param = skip_spaces(param);
+ if (!*param)
+ param = NULL;
+ }
system = strsep(&trigger, ":");
if (!trigger)
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 77ce5a3b6773..70d3d0a09053 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -45,6 +45,9 @@ static int ftrace_event_register(struct trace_event_call *call,
#undef __field_desc
#define __field_desc(type, container, item) type item;
+#undef __field_packed
+#define __field_packed(type, container, item) type item;
+
#undef __array
#define __array(type, item, size) type item[size];
@@ -85,6 +88,13 @@ static void __always_unused ____ftrace_check_##name(void) \
.size = sizeof(_type), .align = __alignof__(_type), \
is_signed_type(_type), .filter_type = _filter_type },
+
+#undef __field_ext_packed
+#define __field_ext_packed(_type, _item, _filter_type) { \
+ .type = #_type, .name = #_item, \
+ .size = sizeof(_type), .align = 1, \
+ is_signed_type(_type), .filter_type = _filter_type },
+
#undef __field
#define __field(_type, _item) __field_ext(_type, _item, FILTER_OTHER)
@@ -94,6 +104,9 @@ static void __always_unused ____ftrace_check_##name(void) \
#undef __field_desc
#define __field_desc(_type, _container, _item) __field_ext(_type, _item, FILTER_OTHER)
+#undef __field_packed
+#define __field_packed(_type, _container, _item) __field_ext_packed(_type, _item, FILTER_OTHER)
+
#undef __array
#define __array(_type, _item, _len) { \
.type = #_type"["__stringify(_len)"]", .name = #_item, \
@@ -129,6 +142,9 @@ static struct trace_event_fields ftrace_event_fields_##name[] = { \
#undef __field_desc
#define __field_desc(type, container, item)
+#undef __field_packed
+#define __field_packed(type, container, item)
+
#undef __array
#define __array(type, item, len)
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 8a4c8d5c2c98..dd4dff71d89a 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -42,7 +42,7 @@ static int allocate_ftrace_ops(struct trace_array *tr)
if (!ops)
return -ENOMEM;
- /* Currently only the non stack verision is supported */
+ /* Currently only the non stack version is supported */
ops->func = function_trace_call;
ops->flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_PID;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 6048f1be26d2..aefb6065b508 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1222,7 +1222,7 @@ fetch_store_strlen(unsigned long addr)
#endif
do {
- ret = probe_kernel_read(&c, (u8 *)addr + len, 1);
+ ret = copy_from_kernel_nofault(&c, (u8 *)addr + len, 1);
len++;
} while (c && ret == 0 && len < MAX_STRING_SIZE);
@@ -1290,7 +1290,7 @@ probe_mem_read_user(void *dest, void *src, size_t size)
{
const void __user *uaddr = (__force const void __user *)src;
- return probe_user_read(dest, uaddr, size);
+ return copy_from_user_nofault(dest, uaddr, size);
}
static nokprobe_inline int
@@ -1300,7 +1300,7 @@ probe_mem_read(void *dest, void *src, size_t size)
if ((unsigned long)src < TASK_SIZE)
return probe_mem_read_user(dest, src, size);
#endif
- return probe_kernel_read(dest, src, size);
+ return copy_from_kernel_nofault(dest, src, size);
}
/* Note that we don't verify it, since the code does not come from user space */
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index b8a928e925c7..d2867ccc6aca 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -639,8 +639,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
ret = -EINVAL;
goto fail;
}
- if ((code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM) ||
- parg->count) {
+ if ((code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM ||
+ code->op == FETCH_OP_DATA) || parg->count) {
/*
* IMM, DATA and COMM is pointing actual address, those
* must be kept, and if parg->count != 0, this is an
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index a0ff9e200ef6..a22b62813f8c 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -236,7 +236,7 @@ struct trace_probe_event {
struct trace_event_call call;
struct list_head files;
struct list_head probes;
- struct trace_uprobe_filter filter[0];
+ struct trace_uprobe_filter filter[];
};
struct trace_probe {
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9fbe1e237563..c41c3c17b86a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4638,11 +4638,11 @@ void print_worker_info(const char *log_lvl, struct task_struct *task)
* Carefully copy the associated workqueue's workfn, name and desc.
* Keep the original last '\0' in case the original is garbage.
*/
- probe_kernel_read(&fn, &worker->current_func, sizeof(fn));
- probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq));
- probe_kernel_read(&wq, &pwq->wq, sizeof(wq));
- probe_kernel_read(name, wq->name, sizeof(name) - 1);
- probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
+ copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn));
+ copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq));
+ copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq));
+ copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1);
+ copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1);
if (fn || name[0] || desc[0]) {
printk("%sWorkqueue: %s %ps", log_lvl, name, fn);