aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorTakashi Iwai <tiwai@suse.de>2019-05-06 16:14:09 +0200
committerTakashi Iwai <tiwai@suse.de>2019-05-06 16:14:34 +0200
commitd81645510ce2a140816c4cb37c45b78d810ca63f (patch)
treeedd9464900904d22a23da362bb152669480c5d26 /kernel
parentALSA: synth: emux: soundfont.c: divide by zero in calc_gus_envelope_time() (diff)
parentMerge branch 'asoc-5.2' into asoc-next (diff)
Merge tag 'asoc-v5.2' of https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
ASoC: Updates for v5.2 This is a pretty huge set of changes, it's been a pretty active release all round but the big thing with this release is the Sound Open Firmware changes from Intel, providing another DSP framework for use with the DSPs in their SoCs. This one works with the firmware of the same name which is free software (unlike the previous DSP firmwares and framework) and there has been some interest in adoption by other systems already so hopefully we will see adoption by other vendors in the future. Other highlights include: - Support for MCLK/sample rate ratio setting in the generic cards. - Support for pin switches in the generic cards. - A big set of improvements to the TLV320AIC32x4 drivers from Annaliese McDermond. - New drivers for Freescale audio mixers, several Intel machines, several Mediatek machines, Meson G12A, Sound Open Firmware and Spreadtrum compressed audio and DMA devices.
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/cpumap.c13
-rw-r--r--kernel/bpf/inode.c32
-rw-r--r--kernel/bpf/syscall.c22
-rw-r--r--kernel/bpf/verifier.c235
-rw-r--r--kernel/cpu.c20
-rw-r--r--kernel/dma/debug.c2
-rw-r--r--kernel/events/core.c91
-rw-r--r--kernel/events/ring_buffer.c40
-rw-r--r--kernel/futex.c4
-rw-r--r--kernel/irq/chip.c4
-rw-r--r--kernel/irq/devres.c2
-rw-r--r--kernel/irq/irqdesc.c1
-rw-r--r--kernel/irq/manage.c1
-rw-r--r--kernel/kprobes.c6
-rw-r--r--kernel/locking/lockdep.c20
-rw-r--r--kernel/ptrace.c15
-rw-r--r--kernel/sched/core.c2
-rw-r--r--kernel/sched/cpufreq_schedutil.c60
-rw-r--r--kernel/sched/deadline.c3
-rw-r--r--kernel/sched/fair.c119
-rw-r--r--kernel/seccomp.c19
-rw-r--r--kernel/signal.c15
-rw-r--r--kernel/sysctl.c3
-rw-r--r--kernel/time/alarmtimer.c2
-rw-r--r--kernel/time/jiffies.c2
-rw-r--r--kernel/time/sched_clock.c4
-rw-r--r--kernel/time/tick-common.c2
-rw-r--r--kernel/time/timekeeping.h7
-rw-r--r--kernel/trace/ftrace.c18
-rw-r--r--kernel/trace/ring_buffer.c2
-rw-r--r--kernel/trace/trace.c39
-rw-r--r--kernel/trace/trace_dynevent.c2
-rw-r--r--kernel/trace/trace_events_hist.c1
-rw-r--r--kernel/trace/trace_syscalls.c9
-rw-r--r--kernel/watchdog.c10
-rw-r--r--kernel/watchdog_hld.c3
-rw-r--r--kernel/workqueue.c5
37 files changed, 531 insertions, 304 deletions
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 8974b3755670..3c18260403dd 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -162,10 +162,14 @@ static void cpu_map_kthread_stop(struct work_struct *work)
static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
struct xdp_frame *xdpf)
{
+ unsigned int hard_start_headroom;
unsigned int frame_size;
void *pkt_data_start;
struct sk_buff *skb;
+ /* Part of headroom was reserved to xdpf */
+ hard_start_headroom = sizeof(struct xdp_frame) + xdpf->headroom;
+
/* build_skb need to place skb_shared_info after SKB end, and
* also want to know the memory "truesize". Thus, need to
* know the memory frame size backing xdp_buff.
@@ -183,15 +187,15 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
* is not at a fixed memory location, with mixed length
* packets, which is bad for cache-line hotness.
*/
- frame_size = SKB_DATA_ALIGN(xdpf->len + xdpf->headroom) +
+ frame_size = SKB_DATA_ALIGN(xdpf->len + hard_start_headroom) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- pkt_data_start = xdpf->data - xdpf->headroom;
+ pkt_data_start = xdpf->data - hard_start_headroom;
skb = build_skb(pkt_data_start, frame_size);
if (!skb)
return NULL;
- skb_reserve(skb, xdpf->headroom);
+ skb_reserve(skb, hard_start_headroom);
__skb_put(skb, xdpf->len);
if (xdpf->metasize)
skb_metadata_set(skb, xdpf->metasize);
@@ -205,6 +209,9 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
* - RX ring dev queue index (skb_record_rx_queue)
*/
+ /* Allow SKB to reuse area used by xdp_frame */
+ xdp_scrub_frame(xdpf);
+
return skb;
}
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 2ada5e21dfa6..4a8f390a2b82 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -554,19 +554,6 @@ struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type typ
}
EXPORT_SYMBOL(bpf_prog_get_type_path);
-static void bpf_evict_inode(struct inode *inode)
-{
- enum bpf_type type;
-
- truncate_inode_pages_final(&inode->i_data);
- clear_inode(inode);
-
- if (S_ISLNK(inode->i_mode))
- kfree(inode->i_link);
- if (!bpf_inode_type(inode, &type))
- bpf_any_put(inode->i_private, type);
-}
-
/*
* Display the mount options in /proc/mounts.
*/
@@ -579,11 +566,28 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root)
return 0;
}
+static void bpf_destroy_inode_deferred(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ enum bpf_type type;
+
+ if (S_ISLNK(inode->i_mode))
+ kfree(inode->i_link);
+ if (!bpf_inode_type(inode, &type))
+ bpf_any_put(inode->i_private, type);
+ free_inode_nonrcu(inode);
+}
+
+static void bpf_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, bpf_destroy_inode_deferred);
+}
+
static const struct super_operations bpf_super_ops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
.show_options = bpf_show_options,
- .evict_inode = bpf_evict_inode,
+ .destroy_inode = bpf_destroy_inode,
};
enum {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 62f6bced3a3c..afca36f53c49 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -136,21 +136,29 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
void *bpf_map_area_alloc(size_t size, int numa_node)
{
- /* We definitely need __GFP_NORETRY, so OOM killer doesn't
- * trigger under memory pressure as we really just want to
- * fail instead.
+ /* We really just want to fail instead of triggering OOM killer
+ * under memory pressure, therefore we set __GFP_NORETRY to kmalloc,
+ * which is used for lower order allocation requests.
+ *
+ * It has been observed that higher order allocation requests done by
+ * vmalloc with __GFP_NORETRY being set might fail due to not trying
+ * to reclaim memory from the page cache, thus we set
+ * __GFP_RETRY_MAYFAIL to avoid such situations.
*/
- const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
+
+ const gfp_t flags = __GFP_NOWARN | __GFP_ZERO;
void *area;
if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
- area = kmalloc_node(size, GFP_USER | flags, numa_node);
+ area = kmalloc_node(size, GFP_USER | __GFP_NORETRY | flags,
+ numa_node);
if (area != NULL)
return area;
}
- return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags,
- __builtin_return_address(0));
+ return __vmalloc_node_flags_caller(size, numa_node,
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL |
+ flags, __builtin_return_address(0));
}
void bpf_map_area_free(void *area)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ce166a002d16..09d5d972c9ff 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -212,7 +212,7 @@ struct bpf_call_arg_meta {
int access_size;
s64 msize_smax_value;
u64 msize_umax_value;
- int ptr_id;
+ int ref_obj_id;
int func_id;
};
@@ -346,35 +346,23 @@ static bool reg_type_may_be_null(enum bpf_reg_type type)
type == PTR_TO_TCP_SOCK_OR_NULL;
}
-static bool type_is_refcounted(enum bpf_reg_type type)
-{
- return type == PTR_TO_SOCKET;
-}
-
-static bool type_is_refcounted_or_null(enum bpf_reg_type type)
-{
- return type == PTR_TO_SOCKET || type == PTR_TO_SOCKET_OR_NULL;
-}
-
-static bool reg_is_refcounted(const struct bpf_reg_state *reg)
-{
- return type_is_refcounted(reg->type);
-}
-
static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
{
return reg->type == PTR_TO_MAP_VALUE &&
map_value_has_spin_lock(reg->map_ptr);
}
-static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg)
+static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
{
- return type_is_refcounted_or_null(reg->type);
+ return type == PTR_TO_SOCKET ||
+ type == PTR_TO_SOCKET_OR_NULL ||
+ type == PTR_TO_TCP_SOCK ||
+ type == PTR_TO_TCP_SOCK_OR_NULL;
}
-static bool arg_type_is_refcounted(enum bpf_arg_type type)
+static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
{
- return type == ARG_PTR_TO_SOCKET;
+ return type == ARG_PTR_TO_SOCK_COMMON;
}
/* Determine whether the function releases some resources allocated by another
@@ -392,6 +380,12 @@ static bool is_acquire_function(enum bpf_func_id func_id)
func_id == BPF_FUNC_sk_lookup_udp;
}
+static bool is_ptr_cast_function(enum bpf_func_id func_id)
+{
+ return func_id == BPF_FUNC_tcp_sock ||
+ func_id == BPF_FUNC_sk_fullsock;
+}
+
/* string representation of 'enum bpf_reg_type' */
static const char * const reg_type_str[] = {
[NOT_INIT] = "?",
@@ -466,6 +460,8 @@ static void print_verifier_state(struct bpf_verifier_env *env,
verbose(env, ",call_%d", func(env, reg)->callsite);
} else {
verbose(env, "(id=%d", reg->id);
+ if (reg_type_may_be_refcounted_or_null(t))
+ verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
if (t != SCALAR_VALUE)
verbose(env, ",off=%d", reg->off);
if (type_is_pkt_pointer(t))
@@ -1901,8 +1897,9 @@ continue_func:
}
frame++;
if (frame >= MAX_CALL_FRAMES) {
- WARN_ONCE(1, "verifier bug. Call stack is too deep\n");
- return -EFAULT;
+ verbose(env, "the call stack of %d frames is too deep !\n",
+ frame);
+ return -E2BIG;
}
goto process_func;
}
@@ -2414,16 +2411,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
/* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
if (!type_is_sk_pointer(type))
goto err_type;
- } else if (arg_type == ARG_PTR_TO_SOCKET) {
- expected_type = PTR_TO_SOCKET;
- if (type != expected_type)
- goto err_type;
- if (meta->ptr_id || !reg->id) {
- verbose(env, "verifier internal error: mismatched references meta=%d, reg=%d\n",
- meta->ptr_id, reg->id);
- return -EFAULT;
+ if (reg->ref_obj_id) {
+ if (meta->ref_obj_id) {
+ verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
+ regno, reg->ref_obj_id,
+ meta->ref_obj_id);
+ return -EFAULT;
+ }
+ meta->ref_obj_id = reg->ref_obj_id;
}
- meta->ptr_id = reg->id;
} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
if (meta->func_id == BPF_FUNC_spin_lock) {
if (process_spin_lock(env, regno, true))
@@ -2740,32 +2736,38 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
return true;
}
-static bool check_refcount_ok(const struct bpf_func_proto *fn)
+static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
{
int count = 0;
- if (arg_type_is_refcounted(fn->arg1_type))
+ if (arg_type_may_be_refcounted(fn->arg1_type))
count++;
- if (arg_type_is_refcounted(fn->arg2_type))
+ if (arg_type_may_be_refcounted(fn->arg2_type))
count++;
- if (arg_type_is_refcounted(fn->arg3_type))
+ if (arg_type_may_be_refcounted(fn->arg3_type))
count++;
- if (arg_type_is_refcounted(fn->arg4_type))
+ if (arg_type_may_be_refcounted(fn->arg4_type))
count++;
- if (arg_type_is_refcounted(fn->arg5_type))
+ if (arg_type_may_be_refcounted(fn->arg5_type))
count++;
+ /* A reference acquiring function cannot acquire
+ * another refcounted ptr.
+ */
+ if (is_acquire_function(func_id) && count)
+ return false;
+
/* We only support one arg being unreferenced at the moment,
* which is sufficient for the helper functions we have right now.
*/
return count <= 1;
}
-static int check_func_proto(const struct bpf_func_proto *fn)
+static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
{
return check_raw_mode_ok(fn) &&
check_arg_pair_ok(fn) &&
- check_refcount_ok(fn) ? 0 : -EINVAL;
+ check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
}
/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
@@ -2799,19 +2801,20 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
}
static void release_reg_references(struct bpf_verifier_env *env,
- struct bpf_func_state *state, int id)
+ struct bpf_func_state *state,
+ int ref_obj_id)
{
struct bpf_reg_state *regs = state->regs, *reg;
int i;
for (i = 0; i < MAX_BPF_REG; i++)
- if (regs[i].id == id)
+ if (regs[i].ref_obj_id == ref_obj_id)
mark_reg_unknown(env, regs, i);
bpf_for_each_spilled_reg(i, state, reg) {
if (!reg)
continue;
- if (reg_is_refcounted(reg) && reg->id == id)
+ if (reg->ref_obj_id == ref_obj_id)
__mark_reg_unknown(reg);
}
}
@@ -2820,15 +2823,20 @@ static void release_reg_references(struct bpf_verifier_env *env,
* resources. Identify all copies of the same pointer and clear the reference.
*/
static int release_reference(struct bpf_verifier_env *env,
- struct bpf_call_arg_meta *meta)
+ int ref_obj_id)
{
struct bpf_verifier_state *vstate = env->cur_state;
+ int err;
int i;
+ err = release_reference_state(cur_func(env), ref_obj_id);
+ if (err)
+ return err;
+
for (i = 0; i <= vstate->curframe; i++)
- release_reg_references(env, vstate->frame[i], meta->ptr_id);
+ release_reg_references(env, vstate->frame[i], ref_obj_id);
- return release_reference_state(cur_func(env), meta->ptr_id);
+ return 0;
}
static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
@@ -3047,7 +3055,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
memset(&meta, 0, sizeof(meta));
meta.pkt_access = fn->pkt_access;
- err = check_func_proto(fn);
+ err = check_func_proto(fn, func_id);
if (err) {
verbose(env, "kernel subsystem misconfigured func %s#%d\n",
func_id_name(func_id), func_id);
@@ -3093,7 +3101,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
return err;
}
} else if (is_release_function(func_id)) {
- err = release_reference(env, &meta);
+ err = release_reference(env, meta.ref_obj_id);
if (err) {
verbose(env, "func %s#%d reference has not been acquired before\n",
func_id_name(func_id), func_id);
@@ -3154,8 +3162,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
if (id < 0)
return id;
- /* For release_reference() */
+ /* For mark_ptr_or_null_reg() */
regs[BPF_REG_0].id = id;
+ /* For release_reference() */
+ regs[BPF_REG_0].ref_obj_id = id;
} else {
/* For mark_ptr_or_null_reg() */
regs[BPF_REG_0].id = ++env->id_gen;
@@ -3170,6 +3180,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
return -EINVAL;
}
+ if (is_ptr_cast_function(func_id))
+ /* For release_reference() */
+ regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
+
do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
err = check_map_func_compatibility(env, meta.map_ptr, func_id);
@@ -3368,7 +3382,7 @@ do_sim:
*dst_reg = *ptr_reg;
}
ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
- if (!ptr_is_dst_reg)
+ if (!ptr_is_dst_reg && ret)
*dst_reg = tmp;
return !ret ? -EFAULT : 0;
}
@@ -4124,15 +4138,35 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
return 0;
}
+static void __find_good_pkt_pointers(struct bpf_func_state *state,
+ struct bpf_reg_state *dst_reg,
+ enum bpf_reg_type type, u16 new_range)
+{
+ struct bpf_reg_state *reg;
+ int i;
+
+ for (i = 0; i < MAX_BPF_REG; i++) {
+ reg = &state->regs[i];
+ if (reg->type == type && reg->id == dst_reg->id)
+ /* keep the maximum range already checked */
+ reg->range = max(reg->range, new_range);
+ }
+
+ bpf_for_each_spilled_reg(i, state, reg) {
+ if (!reg)
+ continue;
+ if (reg->type == type && reg->id == dst_reg->id)
+ reg->range = max(reg->range, new_range);
+ }
+}
+
static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
struct bpf_reg_state *dst_reg,
enum bpf_reg_type type,
bool range_right_open)
{
- struct bpf_func_state *state = vstate->frame[vstate->curframe];
- struct bpf_reg_state *regs = state->regs, *reg;
u16 new_range;
- int i, j;
+ int i;
if (dst_reg->off < 0 ||
(dst_reg->off == 0 && range_right_open))
@@ -4197,20 +4231,9 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
* the range won't allow anything.
* dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
*/
- for (i = 0; i < MAX_BPF_REG; i++)
- if (regs[i].type == type && regs[i].id == dst_reg->id)
- /* keep the maximum range already checked */
- regs[i].range = max(regs[i].range, new_range);
-
- for (j = 0; j <= vstate->curframe; j++) {
- state = vstate->frame[j];
- bpf_for_each_spilled_reg(i, state, reg) {
- if (!reg)
- continue;
- if (reg->type == type && reg->id == dst_reg->id)
- reg->range = max(reg->range, new_range);
- }
- }
+ for (i = 0; i <= vstate->curframe; i++)
+ __find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
+ new_range);
}
/* compute branch direction of the expression "if (reg opcode val) goto target;"
@@ -4665,17 +4688,41 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
reg->type = PTR_TO_TCP_SOCK;
}
- if (is_null || !(reg_is_refcounted(reg) ||
- reg_may_point_to_spin_lock(reg))) {
- /* We don't need id from this point onwards anymore,
- * thus we should better reset it, so that state
- * pruning has chances to take effect.
+ if (is_null) {
+ /* We don't need id and ref_obj_id from this point
+ * onwards anymore, thus we should better reset it,
+ * so that state pruning has chances to take effect.
+ */
+ reg->id = 0;
+ reg->ref_obj_id = 0;
+ } else if (!reg_may_point_to_spin_lock(reg)) {
+ /* For not-NULL ptr, reg->ref_obj_id will be reset
+ * in release_reg_references().
+ *
+ * reg->id is still used by spin_lock ptr. Other
+ * than spin_lock ptr type, reg->id can be reset.
*/
reg->id = 0;
}
}
}
+static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
+ bool is_null)
+{
+ struct bpf_reg_state *reg;
+ int i;
+
+ for (i = 0; i < MAX_BPF_REG; i++)
+ mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
+
+ bpf_for_each_spilled_reg(i, state, reg) {
+ if (!reg)
+ continue;
+ mark_ptr_or_null_reg(state, reg, id, is_null);
+ }
+}
+
/* The logic is similar to find_good_pkt_pointers(), both could eventually
* be folded together at some point.
*/
@@ -4683,24 +4730,20 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
bool is_null)
{
struct bpf_func_state *state = vstate->frame[vstate->curframe];
- struct bpf_reg_state *reg, *regs = state->regs;
+ struct bpf_reg_state *regs = state->regs;
+ u32 ref_obj_id = regs[regno].ref_obj_id;
u32 id = regs[regno].id;
- int i, j;
-
- if (reg_is_refcounted_or_null(&regs[regno]) && is_null)
- release_reference_state(state, id);
+ int i;
- for (i = 0; i < MAX_BPF_REG; i++)
- mark_ptr_or_null_reg(state, &regs[i], id, is_null);
+ if (ref_obj_id && ref_obj_id == id && is_null)
+ /* regs[regno] is in the " == NULL" branch.
+ * No one could have freed the reference state before
+ * doing the NULL check.
+ */
+ WARN_ON_ONCE(release_reference_state(state, id));
- for (j = 0; j <= vstate->curframe; j++) {
- state = vstate->frame[j];
- bpf_for_each_spilled_reg(i, state, reg) {
- if (!reg)
- continue;
- mark_ptr_or_null_reg(state, reg, id, is_null);
- }
- }
+ for (i = 0; i <= vstate->curframe; i++)
+ __mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
}
static bool try_match_pkt_pointers(const struct bpf_insn *insn,
@@ -6052,15 +6095,17 @@ static int propagate_liveness(struct bpf_verifier_env *env,
}
/* Propagate read liveness of registers... */
BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
- /* We don't need to worry about FP liveness because it's read-only */
- for (i = 0; i < BPF_REG_FP; i++) {
- if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ)
- continue;
- if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) {
- err = mark_reg_read(env, &vstate->frame[vstate->curframe]->regs[i],
- &vparent->frame[vstate->curframe]->regs[i]);
- if (err)
- return err;
+ for (frame = 0; frame <= vstate->curframe; frame++) {
+ /* We don't need to worry about FP liveness, it's read-only */
+ for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
+ if (vparent->frame[frame]->regs[i].live & REG_LIVE_READ)
+ continue;
+ if (vstate->frame[frame]->regs[i].live & REG_LIVE_READ) {
+ err = mark_reg_read(env, &vstate->frame[frame]->regs[i],
+ &vparent->frame[frame]->regs[i]);
+ if (err)
+ return err;
+ }
}
}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 025f419d16f6..6754f3ecfd94 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -564,6 +564,20 @@ static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
}
+static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
+{
+ if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
+ return true;
+ /*
+ * When CPU hotplug is disabled, then taking the CPU down is not
+ * possible because takedown_cpu() and the architecture and
+ * subsystem specific mechanisms are not available. So the CPU
+ * which would be completely unplugged again needs to stay around
+ * in the current state.
+ */
+ return st->state <= CPUHP_BRINGUP_CPU;
+}
+
static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
enum cpuhp_state target)
{
@@ -574,8 +588,10 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
st->state++;
ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
if (ret) {
- st->target = prev_state;
- undo_cpu_up(cpu, st);
+ if (can_rollback_cpu(st)) {
+ st->target = prev_state;
+ undo_cpu_up(cpu, st);
+ }
break;
}
}
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index 45d51e8e26f6..a218e43cc382 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -706,7 +706,7 @@ static struct dma_debug_entry *dma_entry_alloc(void)
#ifdef CONFIG_STACKTRACE
entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
entry->stacktrace.entries = entry->st_entries;
- entry->stacktrace.skip = 2;
+ entry->stacktrace.skip = 1;
save_stack_trace(&entry->stacktrace);
#endif
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1032a16bd186..dc7dead2d2cc 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2009,8 +2009,8 @@ event_sched_out(struct perf_event *event,
event->pmu->del(event, 0);
event->oncpu = -1;
- if (event->pending_disable) {
- event->pending_disable = 0;
+ if (READ_ONCE(event->pending_disable) >= 0) {
+ WRITE_ONCE(event->pending_disable, -1);
state = PERF_EVENT_STATE_OFF;
}
perf_event_set_state(event, state);
@@ -2198,7 +2198,8 @@ EXPORT_SYMBOL_GPL(perf_event_disable);
void perf_event_disable_inatomic(struct perf_event *event)
{
- event->pending_disable = 1;
+ WRITE_ONCE(event->pending_disable, smp_processor_id());
+ /* can fail, see perf_pending_event_disable() */
irq_work_queue(&event->pending);
}
@@ -5810,10 +5811,45 @@ void perf_event_wakeup(struct perf_event *event)
}
}
+static void perf_pending_event_disable(struct perf_event *event)
+{
+ int cpu = READ_ONCE(event->pending_disable);
+
+ if (cpu < 0)
+ return;
+
+ if (cpu == smp_processor_id()) {
+ WRITE_ONCE(event->pending_disable, -1);
+ perf_event_disable_local(event);
+ return;
+ }
+
+ /*
+ * CPU-A CPU-B
+ *
+ * perf_event_disable_inatomic()
+ * @pending_disable = CPU-A;
+ * irq_work_queue();
+ *
+ * sched-out
+ * @pending_disable = -1;
+ *
+ * sched-in
+ * perf_event_disable_inatomic()
+ * @pending_disable = CPU-B;
+ * irq_work_queue(); // FAILS
+ *
+ * irq_work_run()
+ * perf_pending_event()
+ *
+ * But the event runs on CPU-B and wants disabling there.
+ */
+ irq_work_queue_on(&event->pending, cpu);
+}
+
static void perf_pending_event(struct irq_work *entry)
{
- struct perf_event *event = container_of(entry,
- struct perf_event, pending);
+ struct perf_event *event = container_of(entry, struct perf_event, pending);
int rctx;
rctx = perf_swevent_get_recursion_context();
@@ -5822,10 +5858,7 @@ static void perf_pending_event(struct irq_work *entry)
* and we won't recurse 'further'.
*/
- if (event->pending_disable) {
- event->pending_disable = 0;
- perf_event_disable_local(event);
- }
+ perf_pending_event_disable(event);
if (event->pending_wakeup) {
event->pending_wakeup = 0;
@@ -7189,6 +7222,7 @@ static void perf_event_mmap_output(struct perf_event *event,
struct perf_output_handle handle;
struct perf_sample_data sample;
int size = mmap_event->event_id.header.size;
+ u32 type = mmap_event->event_id.header.type;
int ret;
if (!perf_event_mmap_match(event, data))
@@ -7232,6 +7266,7 @@ static void perf_event_mmap_output(struct perf_event *event,
perf_output_end(&handle);
out:
mmap_event->event_id.header.size = size;
+ mmap_event->event_id.header.type = type;
}
static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
@@ -9042,26 +9077,29 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
if (task == TASK_TOMBSTONE)
return;
- if (!ifh->nr_file_filters)
- return;
-
- mm = get_task_mm(event->ctx->task);
- if (!mm)
- goto restart;
+ if (ifh->nr_file_filters) {
+ mm = get_task_mm(event->ctx->task);
+ if (!mm)
+ goto restart;
- down_read(&mm->mmap_sem);
+ down_read(&mm->mmap_sem);
+ }
raw_spin_lock_irqsave(&ifh->lock, flags);
list_for_each_entry(filter, &ifh->list, entry) {
- event->addr_filter_ranges[count].start = 0;
- event->addr_filter_ranges[count].size = 0;
+ if (filter->path.dentry) {
+ /*
+ * Adjust base offset if the filter is associated to a
+ * binary that needs to be mapped:
+ */
+ event->addr_filter_ranges[count].start = 0;
+ event->addr_filter_ranges[count].size = 0;
- /*
- * Adjust base offset if the filter is associated to a binary
- * that needs to be mapped:
- */
- if (filter->path.dentry)
perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]);
+ } else {
+ event->addr_filter_ranges[count].start = filter->offset;
+ event->addr_filter_ranges[count].size = filter->size;
+ }
count++;
}
@@ -9069,9 +9107,11 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
event->addr_filters_gen++;
raw_spin_unlock_irqrestore(&ifh->lock, flags);
- up_read(&mm->mmap_sem);
+ if (ifh->nr_file_filters) {
+ up_read(&mm->mmap_sem);
- mmput(mm);
+ mmput(mm);
+ }
restart:
perf_event_stop(event, 1);
@@ -10234,6 +10274,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
init_waitqueue_head(&event->waitq);
+ event->pending_disable = -1;
init_irq_work(&event->pending, perf_pending_event);
mutex_init(&event->mmap_mutex);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index a4047321d7d8..674b35383491 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -392,7 +392,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
* store that will be enabled on successful return
*/
if (!handle->size) { /* A, matches D */
- event->pending_disable = 1;
+ event->pending_disable = smp_processor_id();
perf_output_wakeup(handle);
local_set(&rb->aux_nest, 0);
goto err_put;
@@ -455,24 +455,21 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
rb->aux_head += size;
}
- if (size || handle->aux_flags) {
- /*
- * Only send RECORD_AUX if we have something useful to communicate
- *
- * Note: the OVERWRITE records by themselves are not considered
- * useful, as they don't communicate any *new* information,
- * aside from the short-lived offset, that becomes history at
- * the next event sched-in and therefore isn't useful.
- * The userspace that needs to copy out AUX data in overwrite
- * mode should know to use user_page::aux_head for the actual
- * offset. So, from now on we don't output AUX records that
- * have *only* OVERWRITE flag set.
- */
-
- if (handle->aux_flags & ~(u64)PERF_AUX_FLAG_OVERWRITE)
- perf_event_aux_event(handle->event, aux_head, size,
- handle->aux_flags);
- }
+ /*
+ * Only send RECORD_AUX if we have something useful to communicate
+ *
+ * Note: the OVERWRITE records by themselves are not considered
+ * useful, as they don't communicate any *new* information,
+ * aside from the short-lived offset, that becomes history at
+ * the next event sched-in and therefore isn't useful.
+ * The userspace that needs to copy out AUX data in overwrite
+ * mode should know to use user_page::aux_head for the actual
+ * offset. So, from now on we don't output AUX records that
+ * have *only* OVERWRITE flag set.
+ */
+ if (size || (handle->aux_flags & ~(u64)PERF_AUX_FLAG_OVERWRITE))
+ perf_event_aux_event(handle->event, aux_head, size,
+ handle->aux_flags);
rb->user_page->aux_head = rb->aux_head;
if (rb_need_aux_wakeup(rb))
@@ -480,7 +477,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
if (wakeup) {
if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)
- handle->event->pending_disable = 1;
+ handle->event->pending_disable = smp_processor_id();
perf_output_wakeup(handle);
}
@@ -613,8 +610,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
* PMU requests more than one contiguous chunks of memory
* for SW double buffering
*/
- if ((event->pmu->capabilities & PERF_PMU_CAP_AUX_SW_DOUBLEBUF) &&
- !overwrite) {
+ if (!overwrite) {
if (!max_order)
return -EINVAL;
diff --git a/kernel/futex.c b/kernel/futex.c
index c3b73b0311bc..9e40cf7be606 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -3436,6 +3436,10 @@ static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int p
{
u32 uval, uninitialized_var(nval), mval;
+ /* Futex address must be 32bit aligned */
+ if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0)
+ return -1;
+
retry:
if (get_user(uval, uaddr))
return -1;
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 3faef4a77f71..51128bea3846 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -1449,6 +1449,10 @@ int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, void *vcpu_info)
int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on)
{
data = data->parent_data;
+
+ if (data->chip->flags & IRQCHIP_SKIP_SET_WAKE)
+ return 0;
+
if (data->chip->irq_set_wake)
return data->chip->irq_set_wake(data, on);
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
index 5d5378ea0afe..f808c6a97dcc 100644
--- a/kernel/irq/devres.c
+++ b/kernel/irq/devres.c
@@ -84,8 +84,6 @@ EXPORT_SYMBOL(devm_request_threaded_irq);
* @dev: device to request interrupt for
* @irq: Interrupt line to allocate
* @handler: Function to be called when the IRQ occurs
- * @thread_fn: function to be called in a threaded interrupt context. NULL
- * for devices which handle everything in @handler
* @irqflags: Interrupt type flags
* @devname: An ascii name for the claiming device, dev_name(dev) if NULL
* @dev_id: A cookie passed back to the handler function
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 13539e12cd80..9f8a709337cf 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -558,6 +558,7 @@ int __init early_irq_init(void)
alloc_masks(&desc[i], node);
raw_spin_lock_init(&desc[i].lock);
lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
+ mutex_init(&desc[i].request_mutex);
desc_set_defaults(i, &desc[i], node, NULL, NULL);
}
return arch_early_irq_init();
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 9ec34a2a6638..1401afa0d58a 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -196,6 +196,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
case IRQ_SET_MASK_OK:
case IRQ_SET_MASK_OK_DONE:
cpumask_copy(desc->irq_common_data.affinity, mask);
+ /* fall through */
case IRQ_SET_MASK_OK_NOCOPY:
irq_validate_effective_affinity(data);
irq_set_thread_affinity(desc);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index c83e54727131..b1ea30a5540e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -709,7 +709,6 @@ static void unoptimize_kprobe(struct kprobe *p, bool force)
static int reuse_unused_kprobe(struct kprobe *ap)
{
struct optimized_kprobe *op;
- int ret;
/*
* Unused kprobe MUST be on the way of delayed unoptimizing (means
@@ -720,9 +719,8 @@ static int reuse_unused_kprobe(struct kprobe *ap)
/* Enable the probe again */
ap->flags &= ~KPROBE_FLAG_DISABLED;
/* Optimize it again (remove from op->list) */
- ret = kprobe_optready(ap);
- if (ret)
- return ret;
+ if (!kprobe_optready(ap))
+ return -EINVAL;
optimize_kprobe(ap);
return 0;
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 34cdcbedda49..e221be724fe8 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4689,8 +4689,8 @@ static void free_zapped_rcu(struct rcu_head *ch)
return;
raw_local_irq_save(flags);
- if (!graph_lock())
- goto out_irq;
+ arch_spin_lock(&lockdep_lock);
+ current->lockdep_recursion = 1;
/* closed head */
pf = delayed_free.pf + (delayed_free.index ^ 1);
@@ -4702,8 +4702,8 @@ static void free_zapped_rcu(struct rcu_head *ch)
*/
call_rcu_zapped(delayed_free.pf + delayed_free.index);
- graph_unlock();
-out_irq:
+ current->lockdep_recursion = 0;
+ arch_spin_unlock(&lockdep_lock);
raw_local_irq_restore(flags);
}
@@ -4744,21 +4744,17 @@ static void lockdep_free_key_range_reg(void *start, unsigned long size)
{
struct pending_free *pf;
unsigned long flags;
- int locked;
init_data_structures_once();
raw_local_irq_save(flags);
- locked = graph_lock();
- if (!locked)
- goto out_irq;
-
+ arch_spin_lock(&lockdep_lock);
+ current->lockdep_recursion = 1;
pf = get_pending_free();
__lockdep_free_key_range(pf, start, size);
call_rcu_zapped(pf);
-
- graph_unlock();
-out_irq:
+ current->lockdep_recursion = 0;
+ arch_spin_unlock(&lockdep_lock);
raw_local_irq_restore(flags);
/*
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 771e93f9c43f..6f357f4fc859 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -29,6 +29,7 @@
#include <linux/hw_breakpoint.h>
#include <linux/cn_proc.h>
#include <linux/compat.h>
+#include <linux/sched/signal.h>
/*
* Access another process' address space via ptrace.
@@ -924,18 +925,26 @@ int ptrace_request(struct task_struct *child, long request,
ret = ptrace_setsiginfo(child, &siginfo);
break;
- case PTRACE_GETSIGMASK:
+ case PTRACE_GETSIGMASK: {
+ sigset_t *mask;
+
if (addr != sizeof(sigset_t)) {
ret = -EINVAL;
break;
}
- if (copy_to_user(datavp, &child->blocked, sizeof(sigset_t)))
+ if (test_tsk_restore_sigmask(child))
+ mask = &child->saved_sigmask;
+ else
+ mask = &child->blocked;
+
+ if (copy_to_user(datavp, mask, sizeof(sigset_t)))
ret = -EFAULT;
else
ret = 0;
break;
+ }
case PTRACE_SETSIGMASK: {
sigset_t new_set;
@@ -961,6 +970,8 @@ int ptrace_request(struct task_struct *child, long request,
child->blocked = new_set;
spin_unlock_irq(&child->sighand->siglock);
+ clear_tsk_restore_sigmask(child);
+
ret = 0;
break;
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ead464a0f2e5..4778c48a7fda 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6998,7 +6998,7 @@ static int __maybe_unused cpu_period_quota_parse(char *buf,
{
char tok[21]; /* U64_MAX */
- if (!sscanf(buf, "%s %llu", tok, periodp))
+ if (sscanf(buf, "%20s %llu", tok, periodp) < 1)
return -EINVAL;
*periodp *= NSEC_PER_USEC;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 2efe629425be..3638d2377e3c 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -48,10 +48,10 @@ struct sugov_cpu {
bool iowait_boost_pending;
unsigned int iowait_boost;
- unsigned int iowait_boost_max;
u64 last_update;
unsigned long bw_dl;
+ unsigned long min;
unsigned long max;
/* The field below is for single-CPU policies only: */
@@ -303,8 +303,7 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
if (delta_ns <= TICK_NSEC)
return false;
- sg_cpu->iowait_boost = set_iowait_boost
- ? sg_cpu->sg_policy->policy->min : 0;
+ sg_cpu->iowait_boost = set_iowait_boost ? sg_cpu->min : 0;
sg_cpu->iowait_boost_pending = set_iowait_boost;
return true;
@@ -344,14 +343,13 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
/* Double the boost at each request */
if (sg_cpu->iowait_boost) {
- sg_cpu->iowait_boost <<= 1;
- if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max)
- sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
+ sg_cpu->iowait_boost =
+ min_t(unsigned int, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE);
return;
}
/* First wakeup after IO: start with minimum boost */
- sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
+ sg_cpu->iowait_boost = sg_cpu->min;
}
/**
@@ -373,47 +371,38 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
* This mechanism is designed to boost high frequently IO waiting tasks, while
* being more conservative on tasks which does sporadic IO operations.
*/
-static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
- unsigned long *util, unsigned long *max)
+static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
+ unsigned long util, unsigned long max)
{
- unsigned int boost_util, boost_max;
+ unsigned long boost;
/* No boost currently required */
if (!sg_cpu->iowait_boost)
- return;
+ return util;
/* Reset boost if the CPU appears to have been idle enough */
if (sugov_iowait_reset(sg_cpu, time, false))
- return;
+ return util;
- /*
- * An IO waiting task has just woken up:
- * allow to further double the boost value
- */
- if (sg_cpu->iowait_boost_pending) {
- sg_cpu->iowait_boost_pending = false;
- } else {
+ if (!sg_cpu->iowait_boost_pending) {
/*
- * Otherwise: reduce the boost value and disable it when we
- * reach the minimum.
+ * No boost pending; reduce the boost value.
*/
sg_cpu->iowait_boost >>= 1;
- if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) {
+ if (sg_cpu->iowait_boost < sg_cpu->min) {
sg_cpu->iowait_boost = 0;
- return;
+ return util;
}
}
+ sg_cpu->iowait_boost_pending = false;
+
/*
- * Apply the current boost value: a CPU is boosted only if its current
- * utilization is smaller then the current IO boost level.
+ * @util is already in capacity scale; convert iowait_boost
+ * into the same scale so we can compare.
*/
- boost_util = sg_cpu->iowait_boost;
- boost_max = sg_cpu->iowait_boost_max;
- if (*util * boost_max < *max * boost_util) {
- *util = boost_util;
- *max = boost_max;
- }
+ boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT;
+ return max(boost, util);
}
#ifdef CONFIG_NO_HZ_COMMON
@@ -460,7 +449,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
util = sugov_get_util(sg_cpu);
max = sg_cpu->max;
- sugov_iowait_apply(sg_cpu, time, &util, &max);
+ util = sugov_iowait_apply(sg_cpu, time, util, max);
next_f = get_next_freq(sg_policy, util, max);
/*
* Do not reduce the frequency if the CPU has not been idle
@@ -500,7 +489,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
j_util = sugov_get_util(j_sg_cpu);
j_max = j_sg_cpu->max;
- sugov_iowait_apply(j_sg_cpu, time, &j_util, &j_max);
+ j_util = sugov_iowait_apply(j_sg_cpu, time, j_util, j_max);
if (j_util * max > j_max * util) {
util = j_util;
@@ -782,6 +771,7 @@ out:
return 0;
fail:
+ kobject_put(&tunables->attr_set.kobj);
policy->governor_data = NULL;
sugov_tunables_free(tunables);
@@ -837,7 +827,9 @@ static int sugov_start(struct cpufreq_policy *policy)
memset(sg_cpu, 0, sizeof(*sg_cpu));
sg_cpu->cpu = cpu;
sg_cpu->sg_policy = sg_policy;
- sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
+ sg_cpu->min =
+ (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) /
+ policy->cpuinfo.max_freq;
}
for_each_cpu(cpu, policy->cpus) {
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 6a73e41a2016..43901fa3f269 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -252,7 +252,6 @@ static void task_non_contending(struct task_struct *p)
if (dl_entity_is_special(dl_se))
return;
- WARN_ON(hrtimer_active(&dl_se->inactive_timer));
WARN_ON(dl_se->dl_non_contending);
zerolag_time = dl_se->deadline -
@@ -269,7 +268,7 @@ static void task_non_contending(struct task_struct *p)
* If the "0-lag time" already passed, decrease the active
* utilization now, instead of starting a timer
*/
- if (zerolag_time < 0) {
+ if ((zerolag_time < 0) || hrtimer_active(&dl_se->inactive_timer)) {
if (dl_task(p))
sub_running_bw(dl_se, dl_rq);
if (!dl_task(p) || p->state == TASK_DEAD) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ea74d43924b2..35f3ea375084 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2007,6 +2007,10 @@ static u64 numa_get_avg_runtime(struct task_struct *p, u64 *period)
if (p->last_task_numa_placement) {
delta = runtime - p->last_sum_exec_runtime;
*period = now - p->last_task_numa_placement;
+
+ /* Avoid time going backwards, prevent potential divide error: */
+ if (unlikely((s64)*period < 0))
+ *period = 0;
} else {
delta = p->se.avg.load_sum;
*period = LOAD_AVG_MAX;
@@ -4885,6 +4889,8 @@ static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
+extern const u64 max_cfs_quota_period;
+
static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
{
struct cfs_bandwidth *cfs_b =
@@ -4892,6 +4898,7 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
unsigned long flags;
int overrun;
int idle = 0;
+ int count = 0;
raw_spin_lock_irqsave(&cfs_b->lock, flags);
for (;;) {
@@ -4899,6 +4906,28 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
if (!overrun)
break;
+ if (++count > 3) {
+ u64 new, old = ktime_to_ns(cfs_b->period);
+
+ new = (old * 147) / 128; /* ~115% */
+ new = min(new, max_cfs_quota_period);
+
+ cfs_b->period = ns_to_ktime(new);
+
+ /* since max is 1s, this is limited to 1e9^2, which fits in u64 */
+ cfs_b->quota *= new;
+ cfs_b->quota = div64_u64(cfs_b->quota, old);
+
+ pr_warn_ratelimited(
+ "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us %lld, cfs_quota_us = %lld)\n",
+ smp_processor_id(),
+ div_u64(new, NSEC_PER_USEC),
+ div_u64(cfs_b->quota, NSEC_PER_USEC));
+
+ /* reset count so we don't come right back in here */
+ count = 0;
+ }
+
idle = do_sched_cfs_period_timer(cfs_b, overrun, flags);
}
if (idle)
@@ -7784,10 +7813,10 @@ static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq)
if (cfs_rq->last_h_load_update == now)
return;
- cfs_rq->h_load_next = NULL;
+ WRITE_ONCE(cfs_rq->h_load_next, NULL);
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
- cfs_rq->h_load_next = se;
+ WRITE_ONCE(cfs_rq->h_load_next, se);
if (cfs_rq->last_h_load_update == now)
break;
}
@@ -7797,7 +7826,7 @@ static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq)
cfs_rq->last_h_load_update = now;
}
- while ((se = cfs_rq->h_load_next) != NULL) {
+ while ((se = READ_ONCE(cfs_rq->h_load_next)) != NULL) {
load = cfs_rq->h_load;
load = div64_ul(load * se->avg.load_avg,
cfs_rq_load_avg(cfs_rq) + 1);
@@ -8060,6 +8089,18 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
}
/*
+ * Check whether a rq has a misfit task and if it looks like we can actually
+ * help that task: we can migrate the task to a CPU of higher capacity, or
+ * the task's current CPU is heavily pressured.
+ */
+static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd)
+{
+ return rq->misfit_task_load &&
+ (rq->cpu_capacity_orig < rq->rd->max_cpu_capacity ||
+ check_cpu_capacity(rq, sd));
+}
+
+/*
* Group imbalance indicates (and tries to solve) the problem where balancing
* groups is inadequate due to ->cpus_allowed constraints.
*
@@ -9586,35 +9627,21 @@ static void nohz_balancer_kick(struct rq *rq)
if (time_before(now, nohz.next_balance))
goto out;
- if (rq->nr_running >= 2 || rq->misfit_task_load) {
+ if (rq->nr_running >= 2) {
flags = NOHZ_KICK_MASK;
goto out;
}
rcu_read_lock();
- sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
- if (sds) {
- /*
- * If there is an imbalance between LLC domains (IOW we could
- * increase the overall cache use), we need some less-loaded LLC
- * domain to pull some load. Likewise, we may need to spread
- * load within the current LLC domain (e.g. packed SMT cores but
- * other CPUs are idle). We can't really know from here how busy
- * the others are - so just get a nohz balance going if it looks
- * like this LLC domain has tasks we could move.
- */
- nr_busy = atomic_read(&sds->nr_busy_cpus);
- if (nr_busy > 1) {
- flags = NOHZ_KICK_MASK;
- goto unlock;
- }
-
- }
sd = rcu_dereference(rq->sd);
if (sd) {
- if ((rq->cfs.h_nr_running >= 1) &&
- check_cpu_capacity(rq, sd)) {
+ /*
+ * If there's a CFS task and the current CPU has reduced
+ * capacity; kick the ILB to see if there's a better CPU to run
+ * on.
+ */
+ if (rq->cfs.h_nr_running >= 1 && check_cpu_capacity(rq, sd)) {
flags = NOHZ_KICK_MASK;
goto unlock;
}
@@ -9622,6 +9649,11 @@ static void nohz_balancer_kick(struct rq *rq)
sd = rcu_dereference(per_cpu(sd_asym_packing, cpu));
if (sd) {
+ /*
+ * When ASYM_PACKING; see if there's a more preferred CPU
+ * currently idle; in which case, kick the ILB to move tasks
+ * around.
+ */
for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
if (sched_asym_prefer(i, cpu)) {
flags = NOHZ_KICK_MASK;
@@ -9629,6 +9661,45 @@ static void nohz_balancer_kick(struct rq *rq)
}
}
}
+
+ sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, cpu));
+ if (sd) {
+ /*
+ * When ASYM_CPUCAPACITY; see if there's a higher capacity CPU
+ * to run the misfit task on.
+ */
+ if (check_misfit_status(rq, sd)) {
+ flags = NOHZ_KICK_MASK;
+ goto unlock;
+ }
+
+ /*
+ * For asymmetric systems, we do not want to nicely balance
+ * cache use, instead we want to embrace asymmetry and only
+ * ensure tasks have enough CPU capacity.
+ *
+ * Skip the LLC logic because it's not relevant in that case.
+ */
+ goto unlock;
+ }
+
+ sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
+ if (sds) {
+ /*
+ * If there is an imbalance between LLC domains (IOW we could
+ * increase the overall cache use), we need some less-loaded LLC
+ * domain to pull some load. Likewise, we may need to spread
+ * load within the current LLC domain (e.g. packed SMT cores but
+ * other CPUs are idle). We can't really know from here how busy
+ * the others are - so just get a nohz balance going if it looks
+ * like this LLC domain has tasks we could move.
+ */
+ nr_busy = atomic_read(&sds->nr_busy_cpus);
+ if (nr_busy > 1) {
+ flags = NOHZ_KICK_MASK;
+ goto unlock;
+ }
+ }
unlock:
rcu_read_unlock();
out:
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 54a0347ca812..3582eeb59893 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -149,7 +149,7 @@ static void populate_seccomp_data(struct seccomp_data *sd)
sd->nr = syscall_get_nr(task, regs);
sd->arch = syscall_get_arch();
- syscall_get_arguments(task, regs, 0, 6, args);
+ syscall_get_arguments(task, regs, args);
sd->args[0] = args[0];
sd->args[1] = args[1];
sd->args[2] = args[2];
@@ -502,7 +502,10 @@ out:
*
* Caller must be holding current->sighand->siglock lock.
*
- * Returns 0 on success, -ve on error.
+ * Returns 0 on success, -ve on error, or
+ * - in TSYNC mode: the pid of a thread which was either not in the correct
+ * seccomp mode or did not have an ancestral seccomp filter
+ * - in NEW_LISTENER mode: the fd of the new listener
*/
static long seccomp_attach_filter(unsigned int flags,
struct seccomp_filter *filter)
@@ -1258,6 +1261,16 @@ static long seccomp_set_mode_filter(unsigned int flags,
if (flags & ~SECCOMP_FILTER_FLAG_MASK)
return -EINVAL;
+ /*
+ * In the successful case, NEW_LISTENER returns the new listener fd.
+ * But in the failure case, TSYNC returns the thread that died. If you
+ * combine these two flags, there's no way to tell whether something
+ * succeeded or failed. So, let's disallow this combination.
+ */
+ if ((flags & SECCOMP_FILTER_FLAG_TSYNC) &&
+ (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER))
+ return -EINVAL;
+
/* Prepare the new filter before holding any locks. */
prepared = seccomp_prepare_user_filter(filter);
if (IS_ERR(prepared))
@@ -1304,7 +1317,7 @@ out:
mutex_unlock(&current->signal->cred_guard_mutex);
out_put_fd:
if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
- if (ret < 0) {
+ if (ret) {
listener_f->private_data = NULL;
fput(listener_f);
put_unused_fd(listener);
diff --git a/kernel/signal.c b/kernel/signal.c
index b7953934aa99..227ba170298e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3581,7 +3581,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
if (flags)
return -EINVAL;
- f = fdget_raw(pidfd);
+ f = fdget(pidfd);
if (!f.file)
return -EBADF;
@@ -3605,16 +3605,11 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
if (unlikely(sig != kinfo.si_signo))
goto err;
+ /* Only allow sending arbitrary signals to yourself. */
+ ret = -EPERM;
if ((task_pid(current) != pid) &&
- (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL)) {
- /* Only allow sending arbitrary signals to yourself. */
- ret = -EPERM;
- if (kinfo.si_code != SI_USER)
- goto err;
-
- /* Turn this into a regular kill signal. */
- prepare_kill_siginfo(sig, &kinfo);
- }
+ (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL))
+ goto err;
} else {
prepare_kill_siginfo(sig, &kinfo);
}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e5da394d1ca3..c9ec050bcf46 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -128,6 +128,7 @@ static int zero;
static int __maybe_unused one = 1;
static int __maybe_unused two = 2;
static int __maybe_unused four = 4;
+static unsigned long zero_ul;
static unsigned long one_ul = 1;
static unsigned long long_max = LONG_MAX;
static int one_hundred = 100;
@@ -1750,7 +1751,7 @@ static struct ctl_table fs_table[] = {
.maxlen = sizeof(files_stat.max_files),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
- .extra1 = &zero,
+ .extra1 = &zero_ul,
.extra2 = &long_max,
},
{
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 2c97e8c2d29f..0519a8805aab 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -594,7 +594,7 @@ static ktime_t alarm_timer_remaining(struct k_itimer *timr, ktime_t now)
{
struct alarm *alarm = &timr->it.alarm.alarmtimer;
- return ktime_sub(now, alarm->node.expires);
+ return ktime_sub(alarm->node.expires, now);
}
/**
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index dc1b6f1929f9..ac9c03dd6c7d 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -89,7 +89,7 @@ struct clocksource * __init __weak clocksource_default_clock(void)
return &clocksource_jiffies;
}
-struct clocksource refined_jiffies;
+static struct clocksource refined_jiffies;
int register_refined_jiffies(long cycles_per_second)
{
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 094b82ca95e5..930113b9799a 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -272,7 +272,7 @@ static u64 notrace suspended_sched_clock_read(void)
return cd.read_data[seq & 1].epoch_cyc;
}
-static int sched_clock_suspend(void)
+int sched_clock_suspend(void)
{
struct clock_read_data *rd = &cd.read_data[0];
@@ -283,7 +283,7 @@ static int sched_clock_suspend(void)
return 0;
}
-static void sched_clock_resume(void)
+void sched_clock_resume(void)
{
struct clock_read_data *rd = &cd.read_data[0];
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 529143b4c8d2..df401463a191 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -487,6 +487,7 @@ void tick_freeze(void)
trace_suspend_resume(TPS("timekeeping_freeze"),
smp_processor_id(), true);
system_state = SYSTEM_SUSPEND;
+ sched_clock_suspend();
timekeeping_suspend();
} else {
tick_suspend_local();
@@ -510,6 +511,7 @@ void tick_unfreeze(void)
if (tick_freeze_depth == num_online_cpus()) {
timekeeping_resume();
+ sched_clock_resume();
system_state = SYSTEM_RUNNING;
trace_suspend_resume(TPS("timekeeping_freeze"),
smp_processor_id(), false);
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
index 7a9b4eb7a1d5..141ab3ab0354 100644
--- a/kernel/time/timekeeping.h
+++ b/kernel/time/timekeeping.h
@@ -14,6 +14,13 @@ extern u64 timekeeping_max_deferment(void);
extern void timekeeping_warp_clock(void);
extern int timekeeping_suspend(void);
extern void timekeeping_resume(void);
+#ifdef CONFIG_GENERIC_SCHED_CLOCK
+extern int sched_clock_suspend(void);
+extern void sched_clock_resume(void);
+#else
+static inline int sched_clock_suspend(void) { return 0; }
+static inline void sched_clock_resume(void) { }
+#endif
extern void do_timer(unsigned long ticks);
extern void update_wall_time(void);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fa79323331b2..b920358dd8f7 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -33,6 +33,7 @@
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/rcupdate.h>
+#include <linux/kprobes.h>
#include <trace/events/sched.h>
@@ -1992,7 +1993,7 @@ static void print_bug_type(void)
* modifying the code. @failed should be one of either:
* EFAULT - if the problem happens on reading the @ip address
* EINVAL - if what is read at @ip is not what was expected
- * EPERM - if the problem happens on writting to the @ip address
+ * EPERM - if the problem happens on writing to the @ip address
*/
void ftrace_bug(int failed, struct dyn_ftrace *rec)
{
@@ -2391,7 +2392,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr);
}
- return -1; /* unknow ftrace bug */
+ return -1; /* unknown ftrace bug */
}
void __weak ftrace_replace_code(int mod_flags)
@@ -3004,7 +3005,7 @@ ftrace_allocate_pages(unsigned long num_to_init)
int cnt;
if (!num_to_init)
- return 0;
+ return NULL;
start_pg = pg = kzalloc(sizeof(*pg), GFP_KERNEL);
if (!pg)
@@ -4755,7 +4756,7 @@ static int
ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove,
int reset, int enable)
{
- return ftrace_set_hash(ops, 0, 0, ip, remove, reset, enable);
+ return ftrace_set_hash(ops, NULL, 0, ip, remove, reset, enable);
}
/**
@@ -5463,7 +5464,7 @@ void ftrace_create_filter_files(struct ftrace_ops *ops,
/*
* The name "destroy_filter_files" is really a misnomer. Although
- * in the future, it may actualy delete the files, but this is
+ * in the future, it may actually delete the files, but this is
* really intended to make sure the ops passed in are disabled
* and that when this function returns, the caller is free to
* free the ops.
@@ -5786,7 +5787,7 @@ void ftrace_module_enable(struct module *mod)
/*
* If the tracing is enabled, go ahead and enable the record.
*
- * The reason not to enable the record immediatelly is the
+ * The reason not to enable the record immediately is the
* inherent check of ftrace_make_nop/ftrace_make_call for
* correct previous instructions. Making first the NOP
* conversion puts the module to the correct state, thus
@@ -6246,7 +6247,7 @@ void ftrace_reset_array_ops(struct trace_array *tr)
tr->ops->func = ftrace_stub;
}
-static inline void
+static nokprobe_inline void
__ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ignored, struct pt_regs *regs)
{
@@ -6306,11 +6307,13 @@ static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
{
__ftrace_ops_list_func(ip, parent_ip, NULL, regs);
}
+NOKPROBE_SYMBOL(ftrace_ops_list_func);
#else
static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip)
{
__ftrace_ops_list_func(ip, parent_ip, NULL, NULL);
}
+NOKPROBE_SYMBOL(ftrace_ops_no_ops);
#endif
/*
@@ -6337,6 +6340,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
preempt_enable_notrace();
trace_clear_recursion(bit);
}
+NOKPROBE_SYMBOL(ftrace_ops_assist_func);
/**
* ftrace_ops_get_func - get the function a trampoline should call
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 41b6f96e5366..4ee8d8aa3d0f 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -762,7 +762,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
preempt_disable_notrace();
time = rb_time_stamp(buffer);
- preempt_enable_no_resched_notrace();
+ preempt_enable_notrace();
return time;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 21153e64bf1c..ca1ee656d6d8 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -496,8 +496,10 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
* not modified.
*/
pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
- if (!pid_list)
+ if (!pid_list) {
+ trace_parser_put(&parser);
return -ENOMEM;
+ }
pid_list->pid_max = READ_ONCE(pid_max);
@@ -507,6 +509,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
if (!pid_list->pids) {
+ trace_parser_put(&parser);
kfree(pid_list);
return -ENOMEM;
}
@@ -7025,35 +7028,43 @@ struct buffer_ref {
struct ring_buffer *buffer;
void *page;
int cpu;
- int ref;
+ refcount_t refcount;
};
+static void buffer_ref_release(struct buffer_ref *ref)
+{
+ if (!refcount_dec_and_test(&ref->refcount))
+ return;
+ ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
+ kfree(ref);
+}
+
static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
struct buffer_ref *ref = (struct buffer_ref *)buf->private;
- if (--ref->ref)
- return;
-
- ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
- kfree(ref);
+ buffer_ref_release(ref);
buf->private = 0;
}
-static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
+static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
struct buffer_ref *ref = (struct buffer_ref *)buf->private;
- ref->ref++;
+ if (refcount_read(&ref->refcount) > INT_MAX/2)
+ return false;
+
+ refcount_inc(&ref->refcount);
+ return true;
}
/* Pipe buffer operations for a buffer. */
static const struct pipe_buf_operations buffer_pipe_buf_ops = {
.confirm = generic_pipe_buf_confirm,
.release = buffer_pipe_buf_release,
- .steal = generic_pipe_buf_steal,
+ .steal = generic_pipe_buf_nosteal,
.get = buffer_pipe_buf_get,
};
@@ -7066,11 +7077,7 @@ static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
struct buffer_ref *ref =
(struct buffer_ref *)spd->partial[i].private;
- if (--ref->ref)
- return;
-
- ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
- kfree(ref);
+ buffer_ref_release(ref);
spd->partial[i].private = 0;
}
@@ -7125,7 +7132,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
break;
}
- ref->ref = 1;
+ refcount_set(&ref->refcount, 1);
ref->buffer = iter->trace_buffer->buffer;
ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
if (IS_ERR(ref->page)) {
diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c
index dd1f43588d70..fa100ed3b4de 100644
--- a/kernel/trace/trace_dynevent.c
+++ b/kernel/trace/trace_dynevent.c
@@ -74,7 +74,7 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type)
static int create_dyn_event(int argc, char **argv)
{
struct dyn_event_operations *ops;
- int ret;
+ int ret = -ENODEV;
if (argv[0][0] == '-' || argv[0][0] == '!')
return dyn_event_release(argc, argv, NULL);
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index ca46339f3009..795aa2038377 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -3713,7 +3713,6 @@ static void track_data_destroy(struct hist_trigger_data *hist_data,
struct trace_event_file *file = hist_data->event_file;
destroy_hist_field(data->track_data.track_var, 0);
- destroy_hist_field(data->track_data.var_ref, 0);
if (data->action == ACTION_SNAPSHOT) {
struct track_data *track_data;
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index f93a56d2db27..fa8fbff736d6 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -314,6 +314,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
struct ring_buffer_event *event;
struct ring_buffer *buffer;
unsigned long irq_flags;
+ unsigned long args[6];
int pc;
int syscall_nr;
int size;
@@ -347,7 +348,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
entry = ring_buffer_event_data(event);
entry->nr = syscall_nr;
- syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
+ syscall_get_arguments(current, regs, args);
+ memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args);
event_trigger_unlock_commit(trace_file, buffer, event, entry,
irq_flags, pc);
@@ -583,6 +585,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
struct syscall_metadata *sys_data;
struct syscall_trace_enter *rec;
struct hlist_head *head;
+ unsigned long args[6];
bool valid_prog_array;
int syscall_nr;
int rctx;
@@ -613,8 +616,8 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
return;
rec->nr = syscall_nr;
- syscall_get_arguments(current, regs, 0, sys_data->nb_args,
- (unsigned long *)&rec->args);
+ syscall_get_arguments(current, regs, args);
+ memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args);
if ((valid_prog_array &&
!perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) ||
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 8fbfda94a67b..6a5787233113 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -42,9 +42,9 @@ int __read_mostly watchdog_user_enabled = 1;
int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT;
int __read_mostly soft_watchdog_user_enabled = 1;
int __read_mostly watchdog_thresh = 10;
-int __read_mostly nmi_watchdog_available;
+static int __read_mostly nmi_watchdog_available;
-struct cpumask watchdog_allowed_mask __read_mostly;
+static struct cpumask watchdog_allowed_mask __read_mostly;
struct cpumask watchdog_cpumask __read_mostly;
unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
@@ -554,13 +554,15 @@ static void softlockup_start_all(void)
int lockup_detector_online_cpu(unsigned int cpu)
{
- watchdog_enable(cpu);
+ if (cpumask_test_cpu(cpu, &watchdog_allowed_mask))
+ watchdog_enable(cpu);
return 0;
}
int lockup_detector_offline_cpu(unsigned int cpu)
{
- watchdog_disable(cpu);
+ if (cpumask_test_cpu(cpu, &watchdog_allowed_mask))
+ watchdog_disable(cpu);
return 0;
}
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 71381168dede..247bf0b1582c 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -135,7 +135,8 @@ static void watchdog_overflow_callback(struct perf_event *event,
if (__this_cpu_read(hard_watchdog_warn) == true)
return;
- pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+ pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n",
+ this_cpu);
print_modules();
print_irqtrace_events(current);
if (regs)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4026d1871407..ddee541ea97a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4266,7 +4266,7 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
INIT_LIST_HEAD(&wq->list);
if (alloc_and_link_pwqs(wq) < 0)
- goto err_free_wq;
+ goto err_unreg_lockdep;
if (wq_online && init_rescuer(wq) < 0)
goto err_destroy;
@@ -4292,9 +4292,10 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
return wq;
-err_free_wq:
+err_unreg_lockdep:
wq_unregister_lockdep(wq);
wq_free_lockdep(wq);
+err_free_wq:
free_workqueue_attrs(wq->unbound_attrs);
kfree(wq);
return NULL;