aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcu/tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcu/tree.c')
-rw-r--r--kernel/rcu/tree.c194
1 files changed, 188 insertions, 6 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1694a6b57ad8..0af016fdbf19 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2683,19 +2683,187 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
}
EXPORT_SYMBOL_GPL(call_rcu);
+
+/* Maximum number of jiffies to wait before draining a batch. */
+#define KFREE_DRAIN_JIFFIES (HZ / 50)
+
+/**
+ * struct kfree_rcu_cpu - batch up kfree_rcu() requests for RCU grace period
+ * @rcu_work: Let queue_rcu_work() invoke workqueue handler after grace period
+ * @head: List of kfree_rcu() objects not yet waiting for a grace period
+ * @head_free: List of kfree_rcu() objects already waiting for a grace period
+ * @lock: Synchronize access to this structure
+ * @monitor_work: Promote @head to @head_free after KFREE_DRAIN_JIFFIES
+ * @monitor_todo: Tracks whether a @monitor_work delayed work is pending
+ * @initialized: The @lock and @rcu_work fields have been initialized
+ *
+ * This is a per-CPU structure. The reason that it is not included in
+ * the rcu_data structure is to permit this code to be extracted from
+ * the RCU files. Such extraction could allow further optimization of
+ * the interactions with the slab allocators.
+ */
+struct kfree_rcu_cpu {
+ struct rcu_work rcu_work;
+ struct rcu_head *head;
+ struct rcu_head *head_free;
+ spinlock_t lock;
+ struct delayed_work monitor_work;
+ int monitor_todo;
+ bool initialized;
+};
+
+static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc);
+
/*
- * Queue an RCU callback for lazy invocation after a grace period.
- * This will likely be later named something like "call_rcu_lazy()",
- * but this change will require some way of tagging the lazy RCU
- * callbacks in the list of pending callbacks. Until then, this
- * function may only be called from __kfree_rcu().
+ * This function is invoked in workqueue context after a grace period.
+ * It frees all the objects queued on ->head_free.
*/
-void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
+static void kfree_rcu_work(struct work_struct *work)
+{
+ unsigned long flags;
+ struct rcu_head *head, *next;
+ struct kfree_rcu_cpu *krcp;
+
+ krcp = container_of(to_rcu_work(work), struct kfree_rcu_cpu, rcu_work);
+ spin_lock_irqsave(&krcp->lock, flags);
+ head = krcp->head_free;
+ krcp->head_free = NULL;
+ spin_unlock_irqrestore(&krcp->lock, flags);
+
+ // List "head" is now private, so traverse locklessly.
+ for (; head; head = next) {
+ next = head->next;
+ // Potentially optimize with kfree_bulk in future.
+ __rcu_reclaim(rcu_state.name, head);
+ cond_resched_tasks_rcu_qs();
+ }
+}
+
+/*
+ * Schedule the kfree batch RCU work to run in workqueue context after a GP.
+ *
+ * This function is invoked by kfree_rcu_monitor() when the KFREE_DRAIN_JIFFIES
+ * timeout has been reached.
+ */
+static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
+{
+ lockdep_assert_held(&krcp->lock);
+
+ // If a previous RCU batch is in progress, we cannot immediately
+ // queue another one, so return false to tell caller to retry.
+ if (krcp->head_free)
+ return false;
+
+ krcp->head_free = krcp->head;
+ krcp->head = NULL;
+ INIT_RCU_WORK(&krcp->rcu_work, kfree_rcu_work);
+ queue_rcu_work(system_wq, &krcp->rcu_work);
+ return true;
+}
+
+static inline void kfree_rcu_drain_unlock(struct kfree_rcu_cpu *krcp,
+ unsigned long flags)
+{
+ // Attempt to start a new batch.
+ if (queue_kfree_rcu_work(krcp)) {
+ // Success! Our job is done here.
+ spin_unlock_irqrestore(&krcp->lock, flags);
+ return;
+ }
+
+ // Previous RCU batch still in progress, try again later.
+ if (!xchg(&krcp->monitor_todo, true))
+ schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
+ spin_unlock_irqrestore(&krcp->lock, flags);
+}
+
+/*
+ * This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
+ * It invokes kfree_rcu_drain_unlock() to attempt to start another batch.
+ */
+static void kfree_rcu_monitor(struct work_struct *work)
+{
+ unsigned long flags;
+ struct kfree_rcu_cpu *krcp = container_of(work, struct kfree_rcu_cpu,
+ monitor_work.work);
+
+ spin_lock_irqsave(&krcp->lock, flags);
+ if (xchg(&krcp->monitor_todo, false))
+ kfree_rcu_drain_unlock(krcp, flags);
+ else
+ spin_unlock_irqrestore(&krcp->lock, flags);
+}
+
+/*
+ * This version of kfree_call_rcu does not do batching of kfree_rcu() requests.
+ * Used only by rcuperf torture test for comparison with kfree_rcu_batch().
+ */
+void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func)
{
__call_rcu(head, func, 1);
}
+EXPORT_SYMBOL_GPL(kfree_call_rcu_nobatch);
+
+/*
+ * Queue a request for lazy invocation of kfree() after a grace period.
+ *
+ * Each kfree_call_rcu() request is added to a batch. The batch will be drained
+ * every KFREE_DRAIN_JIFFIES number of jiffies. All the objects in the batch
+ * will be kfree'd in workqueue context. This allows us to:
+ *
+ * 1. Batch requests together to reduce the number of grace periods during
+ * heavy kfree_rcu() load.
+ *
+ * 2. It makes it possible to use kfree_bulk() on a large number of
+ * kfree_rcu() requests thus reducing cache misses and the per-object
+ * overhead of kfree().
+ */
+void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
+{
+ unsigned long flags;
+ struct kfree_rcu_cpu *krcp;
+
+ head->func = func;
+
+ local_irq_save(flags); // For safely calling this_cpu_ptr().
+ krcp = this_cpu_ptr(&krc);
+ if (krcp->initialized)
+ spin_lock(&krcp->lock);
+
+ // Queue the object but don't yet schedule the batch.
+ head->func = func;
+ head->next = krcp->head;
+ krcp->head = head;
+
+ // Set timer to drain after KFREE_DRAIN_JIFFIES.
+ if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
+ !xchg(&krcp->monitor_todo, true))
+ schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
+
+ if (krcp->initialized)
+ spin_unlock(&krcp->lock);
+ local_irq_restore(flags);
+}
EXPORT_SYMBOL_GPL(kfree_call_rcu);
+void __init kfree_rcu_scheduler_running(void)
+{
+ int cpu;
+ unsigned long flags;
+
+ for_each_online_cpu(cpu) {
+ struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
+
+ spin_lock_irqsave(&krcp->lock, flags);
+ if (!krcp->head || xchg(&krcp->monitor_todo, true)) {
+ spin_unlock_irqrestore(&krcp->lock, flags);
+ continue;
+ }
+ schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
+ spin_unlock_irqrestore(&krcp->lock, flags);
+ }
+}
+
/*
* During early boot, any blocking grace-period wait automatically
* implies a grace period. Later on, this is never the case for PREEMPT.
@@ -3557,12 +3725,26 @@ static void __init rcu_dump_rcu_node_tree(void)
struct workqueue_struct *rcu_gp_wq;
struct workqueue_struct *rcu_par_gp_wq;
+static void __init kfree_rcu_batch_init(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
+
+ spin_lock_init(&krcp->lock);
+ INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
+ krcp->initialized = true;
+ }
+}
+
void __init rcu_init(void)
{
int cpu;
rcu_early_boot_tests();
+ kfree_rcu_batch_init();
rcu_bootup_announce();
rcu_init_geometry();
rcu_init_one();