aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/sched.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/sched.h')
-rw-r--r--include/linux/sched.h197
1 files changed, 163 insertions, 34 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 32813c345115..e12b524426b0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -42,6 +42,7 @@ struct backing_dev_info;
struct bio_list;
struct blk_plug;
struct bpf_local_storage;
+struct bpf_run_ctx;
struct capture_control;
struct cfs_rq;
struct fs_struct;
@@ -95,7 +96,9 @@ struct task_group;
#define TASK_WAKING 0x0200
#define TASK_NOLOAD 0x0400
#define TASK_NEW 0x0800
-#define TASK_STATE_MAX 0x1000
+/* RT specific auxilliary flag to mark RT lock waiters */
+#define TASK_RTLOCK_WAIT 0x1000
+#define TASK_STATE_MAX 0x2000
/* Convenience macros for the sake of set_current_state: */
#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
@@ -113,13 +116,13 @@ struct task_group;
__TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \
TASK_PARKED)
-#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0)
+#define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING)
-#define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0)
+#define task_is_traced(task) ((READ_ONCE(task->__state) & __TASK_TRACED) != 0)
-#define task_is_stopped_or_traced(task) ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
+#define task_is_stopped(task) ((READ_ONCE(task->__state) & __TASK_STOPPED) != 0)
-#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+#define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0)
/*
* Special states are those that do not use the normal wait-loop pattern. See
@@ -128,30 +131,37 @@ struct task_group;
#define is_special_task_state(state) \
((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD))
-#define __set_current_state(state_value) \
- do { \
- WARN_ON_ONCE(is_special_task_state(state_value));\
- current->task_state_change = _THIS_IP_; \
- current->state = (state_value); \
- } while (0)
-
-#define set_current_state(state_value) \
- do { \
- WARN_ON_ONCE(is_special_task_state(state_value));\
- current->task_state_change = _THIS_IP_; \
- smp_store_mb(current->state, (state_value)); \
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+# define debug_normal_state_change(state_value) \
+ do { \
+ WARN_ON_ONCE(is_special_task_state(state_value)); \
+ current->task_state_change = _THIS_IP_; \
} while (0)
-#define set_special_state(state_value) \
+# define debug_special_state_change(state_value) \
do { \
- unsigned long flags; /* may shadow */ \
WARN_ON_ONCE(!is_special_task_state(state_value)); \
- raw_spin_lock_irqsave(&current->pi_lock, flags); \
current->task_state_change = _THIS_IP_; \
- current->state = (state_value); \
- raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
} while (0)
+
+# define debug_rtlock_wait_set_state() \
+ do { \
+ current->saved_state_change = current->task_state_change;\
+ current->task_state_change = _THIS_IP_; \
+ } while (0)
+
+# define debug_rtlock_wait_restore_state() \
+ do { \
+ current->task_state_change = current->saved_state_change;\
+ } while (0)
+
#else
+# define debug_normal_state_change(cond) do { } while (0)
+# define debug_special_state_change(cond) do { } while (0)
+# define debug_rtlock_wait_set_state() do { } while (0)
+# define debug_rtlock_wait_restore_state() do { } while (0)
+#endif
+
/*
* set_current_state() includes a barrier so that the write of current->state
* is correctly serialised wrt the caller's subsequent test of whether to
@@ -190,26 +200,79 @@ struct task_group;
* Also see the comments of try_to_wake_up().
*/
#define __set_current_state(state_value) \
- current->state = (state_value)
+ do { \
+ debug_normal_state_change((state_value)); \
+ WRITE_ONCE(current->__state, (state_value)); \
+ } while (0)
#define set_current_state(state_value) \
- smp_store_mb(current->state, (state_value))
+ do { \
+ debug_normal_state_change((state_value)); \
+ smp_store_mb(current->__state, (state_value)); \
+ } while (0)
/*
* set_special_state() should be used for those states when the blocking task
* can not use the regular condition based wait-loop. In that case we must
- * serialize against wakeups such that any possible in-flight TASK_RUNNING stores
- * will not collide with our state change.
+ * serialize against wakeups such that any possible in-flight TASK_RUNNING
+ * stores will not collide with our state change.
*/
#define set_special_state(state_value) \
do { \
unsigned long flags; /* may shadow */ \
+ \
raw_spin_lock_irqsave(&current->pi_lock, flags); \
- current->state = (state_value); \
+ debug_special_state_change((state_value)); \
+ WRITE_ONCE(current->__state, (state_value)); \
raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
} while (0)
-#endif
+/*
+ * PREEMPT_RT specific variants for "sleeping" spin/rwlocks
+ *
+ * RT's spin/rwlock substitutions are state preserving. The state of the
+ * task when blocking on the lock is saved in task_struct::saved_state and
+ * restored after the lock has been acquired. These operations are
+ * serialized by task_struct::pi_lock against try_to_wake_up(). Any non RT
+ * lock related wakeups while the task is blocked on the lock are
+ * redirected to operate on task_struct::saved_state to ensure that these
+ * are not dropped. On restore task_struct::saved_state is set to
+ * TASK_RUNNING so any wakeup attempt redirected to saved_state will fail.
+ *
+ * The lock operation looks like this:
+ *
+ * current_save_and_set_rtlock_wait_state();
+ * for (;;) {
+ * if (try_lock())
+ * break;
+ * raw_spin_unlock_irq(&lock->wait_lock);
+ * schedule_rtlock();
+ * raw_spin_lock_irq(&lock->wait_lock);
+ * set_current_state(TASK_RTLOCK_WAIT);
+ * }
+ * current_restore_rtlock_saved_state();
+ */
+#define current_save_and_set_rtlock_wait_state() \
+ do { \
+ lockdep_assert_irqs_disabled(); \
+ raw_spin_lock(&current->pi_lock); \
+ current->saved_state = current->__state; \
+ debug_rtlock_wait_set_state(); \
+ WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT); \
+ raw_spin_unlock(&current->pi_lock); \
+ } while (0);
+
+#define current_restore_rtlock_saved_state() \
+ do { \
+ lockdep_assert_irqs_disabled(); \
+ raw_spin_lock(&current->pi_lock); \
+ debug_rtlock_wait_restore_state(); \
+ WRITE_ONCE(current->__state, current->saved_state); \
+ current->saved_state = TASK_RUNNING; \
+ raw_spin_unlock(&current->pi_lock); \
+ } while (0);
+
+#define get_current_state() READ_ONCE(current->__state)
/* Task command name length: */
#define TASK_COMM_LEN 16
@@ -226,6 +289,9 @@ extern long schedule_timeout_idle(long timeout);
asmlinkage void schedule(void);
extern void schedule_preempt_disabled(void);
asmlinkage void preempt_schedule_irq(void);
+#ifdef CONFIG_PREEMPT_RT
+ extern void schedule_rtlock(void);
+#endif
extern int __must_check io_schedule_prepare(void);
extern void io_schedule_finish(int token);
@@ -662,8 +728,12 @@ struct task_struct {
*/
struct thread_info thread_info;
#endif
- /* -1 unrunnable, 0 runnable, >0 stopped: */
- volatile long state;
+ unsigned int __state;
+
+#ifdef CONFIG_PREEMPT_RT
+ /* saved state for "spinlock sleepers" */
+ unsigned int saved_state;
+#endif
/*
* This begins the randomizable portion of task_struct. Only
@@ -708,10 +778,17 @@ struct task_struct {
const struct sched_class *sched_class;
struct sched_entity se;
struct sched_rt_entity rt;
+ struct sched_dl_entity dl;
+
+#ifdef CONFIG_SCHED_CORE
+ struct rb_node core_node;
+ unsigned long core_cookie;
+ unsigned int core_occupation;
+#endif
+
#ifdef CONFIG_CGROUP_SCHED
struct task_group *sched_task_group;
#endif
- struct sched_dl_entity dl;
#ifdef CONFIG_UCLAMP_TASK
/*
@@ -738,6 +815,7 @@ struct task_struct {
unsigned int policy;
int nr_cpus_allowed;
const cpumask_t *cpus_ptr;
+ cpumask_t *user_cpus_ptr;
cpumask_t cpus_mask;
void *migration_pending;
#ifdef CONFIG_SMP
@@ -853,6 +931,10 @@ struct task_struct {
/* Used by page_owner=on to detect recursion in page tracking. */
unsigned in_page_owner:1;
#endif
+#ifdef CONFIG_EVENTFD
+ /* Recursion prevention for eventfd_signal() */
+ unsigned in_eventfd_signal:1;
+#endif
unsigned long atomic_flags; /* Flags requiring atomic access. */
@@ -1347,6 +1429,9 @@ struct task_struct {
struct kmap_ctrl kmap_ctrl;
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
unsigned long task_state_change;
+# ifdef CONFIG_PREEMPT_RT
+ unsigned long saved_state_change;
+# endif
#endif
int pagefault_disabled;
#ifdef CONFIG_MMU
@@ -1369,6 +1454,8 @@ struct task_struct {
#ifdef CONFIG_BPF_SYSCALL
/* Used by BPF task local storage */
struct bpf_local_storage __rcu *bpf_storage;
+ /* Used for BPF run context */
+ struct bpf_run_ctx *bpf_ctx;
#endif
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
@@ -1390,6 +1477,16 @@ struct task_struct {
struct llist_head kretprobe_instances;
#endif
+#ifdef CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH
+ /*
+ * If L1D flush is supported on mm context switch
+ * then we use this callback head to queue kill work
+ * to kill tasks that are not running on SMT disabled
+ * cores
+ */
+ struct callback_head l1d_flush_kill;
+#endif
+
/*
* New fields for task_struct should be added above here, so that
* they are included in the randomized portion of task_struct.
@@ -1520,7 +1617,7 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk)
static inline unsigned int task_state_index(struct task_struct *tsk)
{
- unsigned int tsk_state = READ_ONCE(tsk->state);
+ unsigned int tsk_state = READ_ONCE(tsk->__state);
unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT;
BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
@@ -1695,6 +1792,11 @@ extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_
#ifdef CONFIG_SMP
extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
+extern int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node);
+extern void release_user_cpus_ptr(struct task_struct *p);
+extern int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask);
+extern void force_compatible_cpus_allowed_ptr(struct task_struct *p);
+extern void relax_compatible_cpus_allowed_ptr(struct task_struct *p);
#else
static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
{
@@ -1705,6 +1807,21 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpuma
return -EINVAL;
return 0;
}
+static inline int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node)
+{
+ if (src->user_cpus_ptr)
+ return -EINVAL;
+ return 0;
+}
+static inline void release_user_cpus_ptr(struct task_struct *p)
+{
+ WARN_ON(p->user_cpus_ptr);
+}
+
+static inline int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask)
+{
+ return 0;
+}
#endif
extern int yield_to(struct task_struct *p, bool preempt);
@@ -1828,10 +1945,10 @@ static __always_inline void scheduler_ipi(void)
*/
preempt_fold_need_resched();
}
-extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
+extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state);
#else
static inline void scheduler_ipi(void) { }
-static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state)
+static inline unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
{
return 1;
}
@@ -2018,6 +2135,8 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
#endif /* CONFIG_SMP */
+extern bool sched_task_on_rq(struct task_struct *p);
+
/*
* In order to reduce various lock holder preemption latencies provide an
* interface to see if a vCPU is currently running or not.
@@ -2179,4 +2298,14 @@ int sched_trace_rq_nr_running(struct rq *rq);
const struct cpumask *sched_trace_rd_span(struct root_domain *rd);
+#ifdef CONFIG_SCHED_CORE
+extern void sched_core_free(struct task_struct *tsk);
+extern void sched_core_fork(struct task_struct *p);
+extern int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
+ unsigned long uaddr);
+#else
+static inline void sched_core_free(struct task_struct *tsk) { }
+static inline void sched_core_fork(struct task_struct *p) { }
+#endif
+
#endif