From f9419f7bd7a5318b636a941a0214c5cdfa6f6530 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 16 Oct 2017 12:19:44 +0200 Subject: bpf: cpumap add tracepoints This adds two tracepoint to the cpumap. One for the enqueue side trace_xdp_cpumap_enqueue() and one for the kthread dequeue side trace_xdp_cpumap_kthread(). To mitigate the tracepoint overhead, these are invoked during the enqueue/dequeue bulking phases, thus amortizing the cost. The obvious use-cases are for debugging and monitoring. The non-intuitive use-case is using these as a feedback loop to know the system load. One can imagine auto-scaling by reducing, adding or activating more worker CPUs on demand. V4: tracepoint remove time_limit info, instead add sched info V8: intro struct bpf_cpu_map_entry members cpu+map_id in this patch Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- kernel/bpf/cpumap.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index ee7adf4352dd..b4358d84ddf1 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -24,6 +24,7 @@ #include #include #include +#include #include /* netif_receive_skb_core */ #include /* eth_type_trans */ @@ -43,6 +44,8 @@ struct xdp_bulk_queue { /* Struct for every remote "destination" CPU in map */ struct bpf_cpu_map_entry { + u32 cpu; /* kthread CPU and map index */ + int map_id; /* Back reference to map */ u32 qsize; /* Queue size placeholder for map lookup */ /* XDP can run multiple RX-ring queues, need __percpu enqueue store */ @@ -280,15 +283,16 @@ static int cpu_map_kthread_run(void *data) * kthread_stop signal until queue is empty. */ while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) { - unsigned int processed = 0, drops = 0; + unsigned int processed = 0, drops = 0, sched = 0; struct xdp_pkt *xdp_pkt; /* Release CPU reschedule checks */ if (__ptr_ring_empty(rcpu->queue)) { __set_current_state(TASK_INTERRUPTIBLE); schedule(); + sched = 1; } else { - cond_resched(); + sched = cond_resched(); } __set_current_state(TASK_RUNNING); @@ -318,6 +322,9 @@ static int cpu_map_kthread_run(void *data) if (++processed == 8) break; } + /* Feedback loop via tracepoint */ + trace_xdp_cpumap_kthread(rcpu->map_id, processed, drops, sched); + local_bh_enable(); /* resched point, may call do_softirq() */ } __set_current_state(TASK_RUNNING); @@ -354,7 +361,9 @@ struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu, int map_id) if (err) goto free_queue; - rcpu->qsize = qsize; + rcpu->cpu = cpu; + rcpu->map_id = map_id; + rcpu->qsize = qsize; /* Setup kthread */ rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa, @@ -584,6 +593,8 @@ const struct bpf_map_ops cpu_map_ops = { static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, struct xdp_bulk_queue *bq) { + unsigned int processed = 0, drops = 0; + const int to_cpu = rcpu->cpu; struct ptr_ring *q; int i; @@ -599,13 +610,16 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, err = __ptr_ring_produce(q, xdp_pkt); if (err) { - /* Free xdp_pkt */ - page_frag_free(xdp_pkt); + drops++; + page_frag_free(xdp_pkt); /* Free xdp_pkt */ } + processed++; } bq->count = 0; spin_unlock(&q->producer_lock); + /* Feedback loop via tracepoints */ + trace_xdp_cpumap_enqueue(rcpu->map_id, processed, drops, to_cpu); return 0; } -- cgit v1.2.3-59-g8ed1b