/* Copyright (c) 2016 Facebook * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ #include #include #include #include #include #include #include #define _(P) ({typeof(P) val; bpf_probe_read(&val, sizeof(val), &P); val;}) #define MINBLOCK_US 1 struct key_t { char waker[TASK_COMM_LEN]; char target[TASK_COMM_LEN]; u32 wret; u32 tret; }; struct bpf_map_def SEC("maps") counts = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(struct key_t), .value_size = sizeof(u64), .max_entries = 10000, }; struct bpf_map_def SEC("maps") start = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(u32), .value_size = sizeof(u64), .max_entries = 10000, }; struct wokeby_t { char name[TASK_COMM_LEN]; u32 ret; }; struct bpf_map_def SEC("maps") wokeby = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(u32), .value_size = sizeof(struct wokeby_t), .max_entries = 10000, }; struct bpf_map_def SEC("maps") stackmap = { .type = BPF_MAP_TYPE_STACK_TRACE, .key_size = sizeof(u32), .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64), .max_entries = 10000, }; #define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP) SEC("kprobe/try_to_wake_up") int waker(struct pt_regs *ctx) { struct task_struct *p = (void *) PT_REGS_PARM1(ctx); struct wokeby_t woke; u32 pid; pid = _(p->pid); bpf_get_current_comm(&woke.name, sizeof(woke.name)); woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS); bpf_map_update_elem(&wokeby, &pid, &woke, BPF_ANY); return 0; } static inline int update_counts(void *ctx, u32 pid, u64 delta) { struct wokeby_t *woke; u64 zero = 0, *val; struct key_t key; __builtin_memset(&key.waker, 0, sizeof(key.waker)); bpf_get_current_comm(&key.target, sizeof(key.target)); key.tret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS); key.wret = 0; woke = bpf_map_lookup_elem(&wokeby, &pid); if (woke) { key.wret = woke->ret; __builtin_memcpy(&key.waker, woke->name, sizeof(key.waker)); bpf_map_delete_elem(&wokeby, &pid); } val = bpf_map_lookup_elem(&counts, &key); if (!val) { bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST); val = bpf_map_lookup_elem(&counts, &key); if (!val) return 0; } (*val) += delta; return 0; } #if 1 /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ struct sched_switch_args { unsigned long long pad; char prev_comm[16]; int prev_pid; int prev_prio; long long prev_state; char next_comm[16]; int next_pid; int next_prio; }; SEC("tracepoint/sched/sched_switch") int oncpu(struct sched_switch_args *ctx) { /* record previous thread sleep time */ u32 pid = ctx->prev_pid; #else SEC("kprobe/finish_task_switch") int oncpu(struct pt_regs *ctx) { struct task_struct *p = (void *) PT_REGS_PARM1(ctx); /* record previous thread sleep time */ u32 pid = _(p->pid); #endif u64 delta, ts, *tsp; ts = bpf_ktime_get_ns(); bpf_map_update_elem(&start, &pid, &ts, BPF_ANY); /* calculate current thread's delta time */ pid = bpf_get_current_pid_tgid(); tsp = bpf_map_lookup_elem(&start, &pid); if (!tsp) /* missed start or filtered */ return 0; delta = bpf_ktime_get_ns() - *tsp; bpf_map_delete_elem(&start, &pid); delta = delta / 1000; if (delta < MINBLOCK_US) return 0; return update_counts(ctx, pid, delta); } char _license[] SEC("license") = "GPL"; u32 _version SEC("version") = LINUX_VERSION_CODE;