aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c6
-rw-r--r--kernel/events/core.c47
-rw-r--r--kernel/events/ring_buffer.c15
-rw-r--r--kernel/memremap.c9
-rw-r--r--kernel/power/qos.c11
-rw-r--r--kernel/sched/core.c22
6 files changed, 95 insertions, 15 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d1c51b7f5221..5e8dab5bf9ad 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -6270,6 +6270,12 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
if (cgroup_sk_alloc_disabled)
return;
+ /* Socket clone path */
+ if (skcd->val) {
+ cgroup_get(sock_cgroup_ptr(skcd));
+ return;
+ }
+
rcu_read_lock();
while (true) {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3cfabdf7b942..a54f2c2cdb20 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2496,11 +2496,11 @@ static int __perf_event_stop(void *info)
return 0;
}
-static int perf_event_restart(struct perf_event *event)
+static int perf_event_stop(struct perf_event *event, int restart)
{
struct stop_event_data sd = {
.event = event,
- .restart = 1,
+ .restart = restart,
};
int ret = 0;
@@ -3549,10 +3549,18 @@ static int perf_event_read(struct perf_event *event, bool group)
.group = group,
.ret = 0,
};
- ret = smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
- /* The event must have been read from an online CPU: */
- WARN_ON_ONCE(ret);
- ret = ret ? : data.ret;
+ /*
+ * Purposely ignore the smp_call_function_single() return
+ * value.
+ *
+ * If event->oncpu isn't a valid CPU it means the event got
+ * scheduled out and that will have updated the event count.
+ *
+ * Therefore, either way, we'll have an up-to-date event count
+ * after this.
+ */
+ (void)smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
+ ret = data.ret;
} else if (event->state == PERF_EVENT_STATE_INACTIVE) {
struct perf_event_context *ctx = event->ctx;
unsigned long flags;
@@ -4837,6 +4845,19 @@ static void ring_buffer_attach(struct perf_event *event,
spin_unlock_irqrestore(&rb->event_lock, flags);
}
+ /*
+ * Avoid racing with perf_mmap_close(AUX): stop the event
+ * before swizzling the event::rb pointer; if it's getting
+ * unmapped, its aux_mmap_count will be 0 and it won't
+ * restart. See the comment in __perf_pmu_output_stop().
+ *
+ * Data will inevitably be lost when set_output is done in
+ * mid-air, but then again, whoever does it like this is
+ * not in for the data anyway.
+ */
+ if (has_aux(event))
+ perf_event_stop(event, 0);
+
rcu_assign_pointer(event->rb, rb);
if (old_rb) {
@@ -6112,7 +6133,7 @@ static void perf_event_addr_filters_exec(struct perf_event *event, void *data)
raw_spin_unlock_irqrestore(&ifh->lock, flags);
if (restart)
- perf_event_restart(event);
+ perf_event_stop(event, 1);
}
void perf_event_exec(void)
@@ -6156,7 +6177,13 @@ static void __perf_event_output_stop(struct perf_event *event, void *data)
/*
* In case of inheritance, it will be the parent that links to the
- * ring-buffer, but it will be the child that's actually using it:
+ * ring-buffer, but it will be the child that's actually using it.
+ *
+ * We are using event::rb to determine if the event should be stopped,
+ * however this may race with ring_buffer_attach() (through set_output),
+ * which will make us skip the event that actually needs to be stopped.
+ * So ring_buffer_attach() has to stop an aux event before re-assigning
+ * its rb pointer.
*/
if (rcu_dereference(parent->rb) == rb)
ro->err = __perf_event_stop(&sd);
@@ -6670,7 +6697,7 @@ static void __perf_addr_filters_adjust(struct perf_event *event, void *data)
raw_spin_unlock_irqrestore(&ifh->lock, flags);
if (restart)
- perf_event_restart(event);
+ perf_event_stop(event, 1);
}
/*
@@ -7859,7 +7886,7 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
mmput(mm);
restart:
- perf_event_restart(event);
+ perf_event_stop(event, 1);
}
/*
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index ae9b90dc9a5a..257fa460b846 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -330,15 +330,22 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
if (!rb)
return NULL;
- if (!rb_has_aux(rb) || !atomic_inc_not_zero(&rb->aux_refcount))
+ if (!rb_has_aux(rb))
goto err;
/*
- * If rb::aux_mmap_count is zero (and rb_has_aux() above went through),
- * the aux buffer is in perf_mmap_close(), about to get freed.
+ * If aux_mmap_count is zero, the aux buffer is in perf_mmap_close(),
+ * about to get freed, so we leave immediately.
+ *
+ * Checking rb::aux_mmap_count and rb::refcount has to be done in
+ * the same order, see perf_mmap_close. Otherwise we end up freeing
+ * aux pages in this path, which is a bug, because in_atomic().
*/
if (!atomic_read(&rb->aux_mmap_count))
- goto err_put;
+ goto err;
+
+ if (!atomic_inc_not_zero(&rb->aux_refcount))
+ goto err;
/*
* Nesting is not supported for AUX area, make sure nested
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 251d16b4cb41..b501e390bb34 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -247,6 +247,7 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(resource_size(res), SECTION_SIZE);
arch_remove_memory(align_start, align_size);
+ untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
pgmap_radix_release(res);
dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
"%s: failed to free all reserved pages\n", __func__);
@@ -282,6 +283,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
struct percpu_ref *ref, struct vmem_altmap *altmap)
{
resource_size_t key, align_start, align_size, align_end;
+ pgprot_t pgprot = PAGE_KERNEL;
struct dev_pagemap *pgmap;
struct page_map *page_map;
int error, nid, is_ram;
@@ -351,6 +353,11 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
if (nid < 0)
nid = numa_mem_id();
+ error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(align_start), 0,
+ align_size);
+ if (error)
+ goto err_pfn_remap;
+
error = arch_add_memory(nid, align_start, align_size, true);
if (error)
goto err_add_memory;
@@ -371,6 +378,8 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
return __va(res->start);
err_add_memory:
+ untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
+ err_pfn_remap:
err_radix:
pgmap_radix_release(res);
devres_free(page_map);
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 97b0df71303e..168ff442ebde 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -482,7 +482,16 @@ void pm_qos_update_request(struct pm_qos_request *req,
return;
}
- cancel_delayed_work_sync(&req->work);
+ /*
+ * This function may be called very early during boot, for example,
+ * from of_clk_init(), where irq needs to stay disabled.
+ * cancel_delayed_work_sync() assumes that irq is enabled on
+ * invocation and re-enables it on return. Avoid calling it until
+ * workqueue is initialized.
+ */
+ if (keventd_up())
+ cancel_delayed_work_sync(&req->work);
+
__pm_qos_update_request(req, new_value);
}
EXPORT_SYMBOL_GPL(pm_qos_update_request);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2a906f20fba7..44817c640e99 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2016,6 +2016,28 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
success = 1; /* we're going to change ->state */
cpu = task_cpu(p);
+ /*
+ * Ensure we load p->on_rq _after_ p->state, otherwise it would
+ * be possible to, falsely, observe p->on_rq == 0 and get stuck
+ * in smp_cond_load_acquire() below.
+ *
+ * sched_ttwu_pending() try_to_wake_up()
+ * [S] p->on_rq = 1; [L] P->state
+ * UNLOCK rq->lock -----.
+ * \
+ * +--- RMB
+ * schedule() /
+ * LOCK rq->lock -----'
+ * UNLOCK rq->lock
+ *
+ * [task p]
+ * [S] p->state = UNINTERRUPTIBLE [L] p->on_rq
+ *
+ * Pairs with the UNLOCK+LOCK on rq->lock from the
+ * last wakeup of our task and the schedule that got our task
+ * current.
+ */
+ smp_rmb();
if (p->on_rq && ttwu_remote(p, wake_flags))
goto stat;