diff options
Diffstat (limited to 'include/trace/events/sched.h')
-rw-r--r-- | include/trace/events/sched.h | 416 |
1 files changed, 332 insertions, 84 deletions
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index fec25b9cfbaf..4e6b2910cec3 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -5,6 +5,7 @@ #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_SCHED_H +#include <linux/kthread.h> #include <linux/sched/numa_balancing.h> #include <linux/tracepoint.h> #include <linux/binfmts.h> @@ -19,16 +20,16 @@ TRACE_EVENT(sched_kthread_stop, TP_ARGS(t), TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) + __string( comm, t->comm ) + __field( pid_t, pid ) ), TP_fast_assign( - memcpy(__entry->comm, t->comm, TASK_COMM_LEN); + __assign_str(comm); __entry->pid = t->pid; ), - TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) + TP_printk("comm=%s pid=%d", __get_str(comm), __entry->pid) ); /* @@ -51,6 +52,89 @@ TRACE_EVENT(sched_kthread_stop_ret, TP_printk("ret=%d", __entry->ret) ); +/** + * sched_kthread_work_queue_work - called when a work gets queued + * @worker: pointer to the kthread_worker + * @work: pointer to struct kthread_work + * + * This event occurs when a work is queued immediately or once a + * delayed work is actually queued (ie: once the delay has been + * reached). + */ +TRACE_EVENT(sched_kthread_work_queue_work, + + TP_PROTO(struct kthread_worker *worker, + struct kthread_work *work), + + TP_ARGS(worker, work), + + TP_STRUCT__entry( + __field( void *, work ) + __field( void *, function) + __field( void *, worker) + ), + + TP_fast_assign( + __entry->work = work; + __entry->function = work->func; + __entry->worker = worker; + ), + + TP_printk("work struct=%p function=%ps worker=%p", + __entry->work, __entry->function, __entry->worker) +); + +/** + * sched_kthread_work_execute_start - called immediately before the work callback + * @work: pointer to struct kthread_work + * + * Allows to track kthread work execution. + */ +TRACE_EVENT(sched_kthread_work_execute_start, + + TP_PROTO(struct kthread_work *work), + + TP_ARGS(work), + + TP_STRUCT__entry( + __field( void *, work ) + __field( void *, function) + ), + + TP_fast_assign( + __entry->work = work; + __entry->function = work->func; + ), + + TP_printk("work struct %p: function %ps", __entry->work, __entry->function) +); + +/** + * sched_kthread_work_execute_end - called immediately after the work callback + * @work: pointer to struct work_struct + * @function: pointer to worker function + * + * Allows to track workqueue execution. + */ +TRACE_EVENT(sched_kthread_work_execute_end, + + TP_PROTO(struct kthread_work *work, kthread_work_func_t function), + + TP_ARGS(work, function), + + TP_STRUCT__entry( + __field( void *, work ) + __field( void *, function) + ), + + TP_fast_assign( + __entry->work = work; + __entry->function = function; + ), + + TP_printk("work struct %p: function %ps", __entry->work, __entry->function) +); + /* * Tracepoint for waking up a task: */ @@ -64,7 +148,6 @@ DECLARE_EVENT_CLASS(sched_wakeup_template, __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( int, prio ) - __field( int, success ) __field( int, target_cpu ) ), @@ -72,7 +155,6 @@ DECLARE_EVENT_CLASS(sched_wakeup_template, memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ - __entry->success = 1; /* rudiment, kill when possible */ __entry->target_cpu = task_cpu(p); ), @@ -90,7 +172,7 @@ DEFINE_EVENT(sched_wakeup_template, sched_waking, TP_ARGS(p)); /* - * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG. + * Tracepoint called when the task is actually woken; p->state == TASK_RUNNING. * It is not always called from the waking context. */ DEFINE_EVENT(sched_wakeup_template, sched_wakeup, @@ -105,13 +187,13 @@ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, TP_ARGS(p)); #ifdef CREATE_TRACE_POINTS -static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p) +static inline long __trace_sched_switch_state(bool preempt, + unsigned int prev_state, + struct task_struct *p) { unsigned int state; -#ifdef CONFIG_SCHED_DEBUG BUG_ON(p != current); -#endif /* CONFIG_SCHED_DEBUG */ /* * Preemption ignores task state, therefore preempted tasks are always @@ -126,7 +208,7 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct * * it for left shift operation to get the correct task->state * mapping. */ - state = task_state_index(p); + state = __task_state_index(prev_state, p->exit_state); return state ? (1 << (state - 1)) : state; } @@ -139,9 +221,10 @@ TRACE_EVENT(sched_switch, TP_PROTO(bool preempt, struct task_struct *prev, - struct task_struct *next), + struct task_struct *next, + unsigned int prev_state), - TP_ARGS(preempt, prev, next), + TP_ARGS(preempt, prev, next, prev_state), TP_STRUCT__entry( __array( char, prev_comm, TASK_COMM_LEN ) @@ -154,11 +237,11 @@ TRACE_EVENT(sched_switch, ), TP_fast_assign( - memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; __entry->prev_prio = prev->prio; - __entry->prev_state = __trace_sched_switch_state(preempt, prev); - memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); + __entry->prev_state = __trace_sched_switch_state(preempt, prev_state, prev); + memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; /* XXX SCHED_DEADLINE */ @@ -193,15 +276,15 @@ TRACE_EVENT(sched_migrate_task, TP_ARGS(p, dest_cpu), TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, orig_cpu ) - __field( int, dest_cpu ) + __string( comm, p->comm ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, orig_cpu ) + __field( int, dest_cpu ) ), TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __assign_str(comm); __entry->pid = p->pid; __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ __entry->orig_cpu = task_cpu(p); @@ -209,7 +292,7 @@ TRACE_EVENT(sched_migrate_task, ), TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d", - __entry->comm, __entry->pid, __entry->prio, + __get_str(comm), __entry->pid, __entry->prio, __entry->orig_cpu, __entry->dest_cpu) ); @@ -220,19 +303,19 @@ DECLARE_EVENT_CLASS(sched_process_template, TP_ARGS(p), TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) + __string( comm, p->comm ) + __field( pid_t, pid ) + __field( int, prio ) ), TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __assign_str(comm); __entry->pid = p->pid; __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ ), TP_printk("comm=%s pid=%d prio=%d", - __entry->comm, __entry->pid, __entry->prio) + __get_str(comm), __entry->pid, __entry->prio) ); /* @@ -243,11 +326,37 @@ DEFINE_EVENT(sched_process_template, sched_process_free, TP_ARGS(p)); /* - * Tracepoint for a task exiting: + * Tracepoint for a task exiting. + * Note, it's a superset of sched_process_template and should be kept + * compatible as much as possible. sched_process_exits has an extra + * `group_dead` argument, so sched_process_template can't be used, + * unfortunately, just like sched_migrate_task above. */ -DEFINE_EVENT(sched_process_template, sched_process_exit, - TP_PROTO(struct task_struct *p), - TP_ARGS(p)); +TRACE_EVENT(sched_process_exit, + + TP_PROTO(struct task_struct *p, bool group_dead), + + TP_ARGS(p, group_dead), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( bool, group_dead ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ + __entry->group_dead = group_dead; + ), + + TP_printk("comm=%s pid=%d prio=%d group_dead=%s", + __entry->comm, __entry->pid, __entry->prio, + __entry->group_dead ? "true" : "false" + ) +); /* * Tracepoint for waiting on task to unschedule: @@ -266,23 +375,23 @@ TRACE_EVENT(sched_process_wait, TP_ARGS(pid), TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) + __string( comm, current->comm ) __field( pid_t, pid ) __field( int, prio ) ), TP_fast_assign( - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + __assign_str(comm); __entry->pid = pid_nr(pid); __entry->prio = current->prio; /* XXX SCHED_DEADLINE */ ), TP_printk("comm=%s pid=%d prio=%d", - __entry->comm, __entry->pid, __entry->prio) + __get_str(comm), __entry->pid, __entry->prio) ); /* - * Tracepoint for do_fork: + * Tracepoint for kernel_clone: */ TRACE_EVENT(sched_process_fork, @@ -291,22 +400,22 @@ TRACE_EVENT(sched_process_fork, TP_ARGS(parent, child), TP_STRUCT__entry( - __array( char, parent_comm, TASK_COMM_LEN ) - __field( pid_t, parent_pid ) - __array( char, child_comm, TASK_COMM_LEN ) - __field( pid_t, child_pid ) + __string( parent_comm, parent->comm ) + __field( pid_t, parent_pid ) + __string( child_comm, child->comm ) + __field( pid_t, child_pid ) ), TP_fast_assign( - memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); + __assign_str(parent_comm); __entry->parent_pid = parent->pid; - memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); + __assign_str(child_comm); __entry->child_pid = child->pid; ), TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d", - __entry->parent_comm, __entry->parent_pid, - __entry->child_comm, __entry->child_pid) + __get_str(parent_comm), __entry->parent_pid, + __get_str(child_comm), __entry->child_pid) ); /* @@ -326,7 +435,7 @@ TRACE_EVENT(sched_process_exec, ), TP_fast_assign( - __assign_str(filename, bprm->filename); + __assign_str(filename); __entry->pid = p->pid; __entry->old_pid = old_pid; ), @@ -335,6 +444,41 @@ TRACE_EVENT(sched_process_exec, __entry->pid, __entry->old_pid) ); +/** + * sched_prepare_exec - called before setting up new exec + * @task: pointer to the current task + * @bprm: pointer to linux_binprm used for new exec + * + * Called before flushing the old exec, where @task is still unchanged, but at + * the point of no return during switching to the new exec. At the point it is + * called the exec will either succeed, or on failure terminate the task. Also + * see the "sched_process_exec" tracepoint, which is called right after @task + * has successfully switched to the new exec. + */ +TRACE_EVENT(sched_prepare_exec, + + TP_PROTO(struct task_struct *task, struct linux_binprm *bprm), + + TP_ARGS(task, bprm), + + TP_STRUCT__entry( + __string( interp, bprm->interp ) + __string( filename, bprm->filename ) + __field( pid_t, pid ) + __string( comm, task->comm ) + ), + + TP_fast_assign( + __assign_str(interp); + __assign_str(filename); + __entry->pid = task->pid; + __assign_str(comm); + ), + + TP_printk("interp=%s filename=%s pid=%d comm=%s", + __get_str(interp), __get_str(filename), + __entry->pid, __get_str(comm)) +); #ifdef CONFIG_SCHEDSTATS #define DEFINE_EVENT_SCHEDSTAT DEFINE_EVENT @@ -355,19 +499,19 @@ DECLARE_EVENT_CLASS_SCHEDSTAT(sched_stat_template, TP_ARGS(__perf_task(tsk), __perf_count(delay)), TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( u64, delay ) + __string( comm, tsk->comm ) + __field( pid_t, pid ) + __field( u64, delay ) ), TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __assign_str(comm); __entry->pid = tsk->pid; __entry->delay = delay; ), TP_printk("comm=%s pid=%d delay=%Lu [ns]", - __entry->comm, __entry->pid, + __get_str(comm), __entry->pid, (unsigned long long)__entry->delay) ); @@ -408,33 +552,30 @@ DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_blocked, */ DECLARE_EVENT_CLASS(sched_stat_runtime, - TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), + TP_PROTO(struct task_struct *tsk, u64 runtime), - TP_ARGS(tsk, __perf_count(runtime), vruntime), + TP_ARGS(tsk, __perf_count(runtime)), TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( u64, runtime ) - __field( u64, vruntime ) + __string( comm, tsk->comm ) + __field( pid_t, pid ) + __field( u64, runtime ) ), TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __assign_str(comm); __entry->pid = tsk->pid; __entry->runtime = runtime; - __entry->vruntime = vruntime; ), - TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->runtime, - (unsigned long long)__entry->vruntime) + TP_printk("comm=%s pid=%d runtime=%Lu [ns]", + __get_str(comm), __entry->pid, + (unsigned long long)__entry->runtime) ); DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime, - TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), - TP_ARGS(tsk, runtime, vruntime)); + TP_PROTO(struct task_struct *tsk, u64 runtime), + TP_ARGS(tsk, runtime)); /* * Tracepoint for showing priority inheritance modifying a tasks @@ -447,14 +588,14 @@ TRACE_EVENT(sched_pi_setprio, TP_ARGS(tsk, pi_task), TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, oldprio ) - __field( int, newprio ) + __string( comm, tsk->comm ) + __field( pid_t, pid ) + __field( int, oldprio ) + __field( int, newprio ) ), TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __assign_str(comm); __entry->pid = tsk->pid; __entry->oldprio = tsk->prio; __entry->newprio = pi_task ? @@ -464,7 +605,7 @@ TRACE_EVENT(sched_pi_setprio, ), TP_printk("comm=%s pid=%d oldprio=%d newprio=%d", - __entry->comm, __entry->pid, + __get_str(comm), __entry->pid, __entry->oldprio, __entry->newprio) ); @@ -474,16 +615,16 @@ TRACE_EVENT(sched_process_hang, TP_ARGS(tsk), TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) + __string( comm, tsk->comm ) + __field( pid_t, pid ) ), TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __assign_str(comm); __entry->pid = tsk->pid; ), - TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) + TP_printk("comm=%s pid=%d", __get_str(comm), __entry->pid) ); #endif /* CONFIG_DETECT_HUNG_TASK */ @@ -579,6 +720,91 @@ DEFINE_EVENT(sched_numa_pair_template, sched_swap_numa, TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu) ); +#ifdef CONFIG_NUMA_BALANCING +#define NUMAB_SKIP_REASON \ + EM( NUMAB_SKIP_UNSUITABLE, "unsuitable" ) \ + EM( NUMAB_SKIP_SHARED_RO, "shared_ro" ) \ + EM( NUMAB_SKIP_INACCESSIBLE, "inaccessible" ) \ + EM( NUMAB_SKIP_SCAN_DELAY, "scan_delay" ) \ + EM( NUMAB_SKIP_PID_INACTIVE, "pid_inactive" ) \ + EM( NUMAB_SKIP_IGNORE_PID, "ignore_pid_inactive" ) \ + EMe(NUMAB_SKIP_SEQ_COMPLETED, "seq_completed" ) + +/* Redefine for export. */ +#undef EM +#undef EMe +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define EMe(a, b) TRACE_DEFINE_ENUM(a); + +NUMAB_SKIP_REASON + +/* Redefine for symbolic printing. */ +#undef EM +#undef EMe +#define EM(a, b) { a, b }, +#define EMe(a, b) { a, b } + +TRACE_EVENT(sched_skip_vma_numa, + + TP_PROTO(struct mm_struct *mm, struct vm_area_struct *vma, + enum numa_vmaskip_reason reason), + + TP_ARGS(mm, vma, reason), + + TP_STRUCT__entry( + __field(unsigned long, numa_scan_offset) + __field(unsigned long, vm_start) + __field(unsigned long, vm_end) + __field(enum numa_vmaskip_reason, reason) + ), + + TP_fast_assign( + __entry->numa_scan_offset = mm->numa_scan_offset; + __entry->vm_start = vma->vm_start; + __entry->vm_end = vma->vm_end; + __entry->reason = reason; + ), + + TP_printk("numa_scan_offset=%lX vm_start=%lX vm_end=%lX reason=%s", + __entry->numa_scan_offset, + __entry->vm_start, + __entry->vm_end, + __print_symbolic(__entry->reason, NUMAB_SKIP_REASON)) +); + +TRACE_EVENT(sched_skip_cpuset_numa, + + TP_PROTO(struct task_struct *tsk, nodemask_t *mem_allowed_ptr), + + TP_ARGS(tsk, mem_allowed_ptr), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( pid_t, tgid ) + __field( pid_t, ngid ) + __array( unsigned long, mem_allowed, BITS_TO_LONGS(MAX_NUMNODES)) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = task_pid_nr(tsk); + __entry->tgid = task_tgid_nr(tsk); + __entry->ngid = task_numa_group_id(tsk); + BUILD_BUG_ON(sizeof(nodemask_t) != \ + BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long)); + memcpy(__entry->mem_allowed, mem_allowed_ptr->bits, + sizeof(__entry->mem_allowed)); + ), + + TP_printk("comm=%s pid=%d tgid=%d ngid=%d mem_nodes_allowed=%*pbl", + __entry->comm, + __entry->pid, + __entry->tgid, + __entry->ngid, + MAX_NUMNODES, __entry->mem_allowed) +); +#endif /* CONFIG_NUMA_BALANCING */ /* * Tracepoint for waking a polling cpu without an IPI. @@ -606,46 +832,68 @@ TRACE_EVENT(sched_wake_idle_without_ipi, * * Postfixed with _tp to make them easily identifiable in the code. */ -DECLARE_TRACE(pelt_cfs_tp, +DECLARE_TRACE(pelt_cfs, TP_PROTO(struct cfs_rq *cfs_rq), TP_ARGS(cfs_rq)); -DECLARE_TRACE(pelt_rt_tp, +DECLARE_TRACE(pelt_rt, TP_PROTO(struct rq *rq), TP_ARGS(rq)); -DECLARE_TRACE(pelt_dl_tp, +DECLARE_TRACE(pelt_dl, TP_PROTO(struct rq *rq), TP_ARGS(rq)); -DECLARE_TRACE(pelt_thermal_tp, +DECLARE_TRACE(pelt_hw, TP_PROTO(struct rq *rq), TP_ARGS(rq)); -DECLARE_TRACE(pelt_irq_tp, +DECLARE_TRACE(pelt_irq, TP_PROTO(struct rq *rq), TP_ARGS(rq)); -DECLARE_TRACE(pelt_se_tp, +DECLARE_TRACE(pelt_se, TP_PROTO(struct sched_entity *se), TP_ARGS(se)); -DECLARE_TRACE(sched_overutilized_tp, +DECLARE_TRACE(sched_cpu_capacity, + TP_PROTO(struct rq *rq), + TP_ARGS(rq)); + +DECLARE_TRACE(sched_overutilized, TP_PROTO(struct root_domain *rd, bool overutilized), TP_ARGS(rd, overutilized)); -DECLARE_TRACE(sched_util_est_cfs_tp, +DECLARE_TRACE(sched_util_est_cfs, TP_PROTO(struct cfs_rq *cfs_rq), TP_ARGS(cfs_rq)); -DECLARE_TRACE(sched_util_est_se_tp, +DECLARE_TRACE(sched_util_est_se, TP_PROTO(struct sched_entity *se), TP_ARGS(se)); -DECLARE_TRACE(sched_update_nr_running_tp, +DECLARE_TRACE(sched_update_nr_running, TP_PROTO(struct rq *rq, int change), TP_ARGS(rq, change)); +DECLARE_TRACE(sched_compute_energy, + TP_PROTO(struct task_struct *p, int dst_cpu, unsigned long energy, + unsigned long max_util, unsigned long busy_time), + TP_ARGS(p, dst_cpu, energy, max_util, busy_time)); + +DECLARE_TRACE(sched_entry, + TP_PROTO(bool preempt, unsigned long ip), + TP_ARGS(preempt, ip)); + +DECLARE_TRACE(sched_exit, + TP_PROTO(bool is_switch, unsigned long ip), + TP_ARGS(is_switch, ip)); + +DECLARE_TRACE_CONDITION(sched_set_state, + TP_PROTO(struct task_struct *tsk, int state), + TP_ARGS(tsk, state), + TP_CONDITION(!!(tsk->__state) != !!state)); + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ |