aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/cputime.h66
-rw-r--r--include/asm-generic/cputime_jiffies.h72
-rw-r--r--include/asm-generic/cputime_nsecs.h104
-rw-r--r--include/linux/aer.h4
-rw-r--r--include/linux/async.h10
-rw-r--r--include/linux/cgroup.h3
-rw-r--r--include/linux/clockchips.h9
-rw-r--r--include/linux/context_tracking.h28
-rw-r--r--include/linux/elevator.h5
-rw-r--r--include/linux/ftrace.h6
-rw-r--r--include/linux/ftrace_event.h6
-rw-r--r--include/linux/hardirq.h8
-rw-r--r--include/linux/init.h1
-rw-r--r--include/linux/init_task.h12
-rw-r--r--include/linux/irq.h8
-rw-r--r--include/linux/irq_work.h22
-rw-r--r--include/linux/kernel_stat.h2
-rw-r--r--include/linux/kprobes.h12
-rw-r--r--include/linux/kvm_host.h55
-rw-r--r--include/linux/pci.h7
-rw-r--r--include/linux/perf_event.h20
-rw-r--r--include/linux/printk.h3
-rw-r--r--include/linux/profile.h13
-rw-r--r--include/linux/rcupdate.h15
-rw-r--r--include/linux/ring_buffer.h1
-rw-r--r--include/linux/rtc.h1
-rw-r--r--include/linux/sched.h188
-rw-r--r--include/linux/sched/rt.h58
-rw-r--r--include/linux/sched/sysctl.h110
-rw-r--r--include/linux/smpboot.h5
-rw-r--r--include/linux/srcu.h26
-rw-r--r--include/linux/tick.h17
-rw-r--r--include/linux/time.h13
-rw-r--r--include/linux/tsacct_kern.h3
-rw-r--r--include/linux/uprobes.h23
-rw-r--r--include/linux/vtime.h59
-rw-r--r--include/linux/workqueue.h35
-rw-r--r--include/trace/events/ras.h77
-rw-r--r--include/trace/events/rcu.h31
-rw-r--r--include/trace/events/workqueue.h10
-rw-r--r--include/uapi/linux/auto_fs.h25
-rw-r--r--include/uapi/linux/perf_event.h3
42 files changed, 817 insertions, 359 deletions
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index 9a62937c56ca..51969436b8b8 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -4,66 +4,12 @@
#include <linux/time.h>
#include <linux/jiffies.h>
-typedef unsigned long __nocast cputime_t;
-
-#define cputime_one_jiffy jiffies_to_cputime(1)
-#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct)
-#define cputime_to_scaled(__ct) (__ct)
-#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz)
-
-typedef u64 __nocast cputime64_t;
-
-#define cputime64_to_jiffies64(__ct) (__force u64)(__ct)
-#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif)
-
-#define nsecs_to_cputime64(__ct) \
- jiffies64_to_cputime64(nsecs_to_jiffies64(__ct))
-
-
-/*
- * Convert cputime to microseconds and back.
- */
-#define cputime_to_usecs(__ct) \
- jiffies_to_usecs(cputime_to_jiffies(__ct))
-#define usecs_to_cputime(__usec) \
- jiffies_to_cputime(usecs_to_jiffies(__usec))
-#define usecs_to_cputime64(__usec) \
- jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000))
-
-/*
- * Convert cputime to seconds and back.
- */
-#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ)
-#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ)
-
-/*
- * Convert cputime to timespec and back.
- */
-#define timespec_to_cputime(__val) \
- jiffies_to_cputime(timespec_to_jiffies(__val))
-#define cputime_to_timespec(__ct,__val) \
- jiffies_to_timespec(cputime_to_jiffies(__ct),__val)
-
-/*
- * Convert cputime to timeval and back.
- */
-#define timeval_to_cputime(__val) \
- jiffies_to_cputime(timeval_to_jiffies(__val))
-#define cputime_to_timeval(__ct,__val) \
- jiffies_to_timeval(cputime_to_jiffies(__ct),__val)
-
-/*
- * Convert cputime to clock and back.
- */
-#define cputime_to_clock_t(__ct) \
- jiffies_to_clock_t(cputime_to_jiffies(__ct))
-#define clock_t_to_cputime(__x) \
- jiffies_to_cputime(clock_t_to_jiffies(__x))
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+# include <asm-generic/cputime_jiffies.h>
+#endif
-/*
- * Convert cputime64 to clock.
- */
-#define cputime64_to_clock_t(__ct) \
- jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+# include <asm-generic/cputime_nsecs.h>
+#endif
#endif
diff --git a/include/asm-generic/cputime_jiffies.h b/include/asm-generic/cputime_jiffies.h
new file mode 100644
index 000000000000..272ecba9f588
--- /dev/null
+++ b/include/asm-generic/cputime_jiffies.h
@@ -0,0 +1,72 @@
+#ifndef _ASM_GENERIC_CPUTIME_JIFFIES_H
+#define _ASM_GENERIC_CPUTIME_JIFFIES_H
+
+typedef unsigned long __nocast cputime_t;
+
+#define cputime_one_jiffy jiffies_to_cputime(1)
+#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct)
+#define cputime_to_scaled(__ct) (__ct)
+#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz)
+
+typedef u64 __nocast cputime64_t;
+
+#define cputime64_to_jiffies64(__ct) (__force u64)(__ct)
+#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif)
+
+
+/*
+ * Convert nanoseconds to cputime
+ */
+#define nsecs_to_cputime64(__nsec) \
+ jiffies64_to_cputime64(nsecs_to_jiffies64(__nsec))
+#define nsecs_to_cputime(__nsec) \
+ jiffies_to_cputime(nsecs_to_jiffies(__nsec))
+
+
+/*
+ * Convert cputime to microseconds and back.
+ */
+#define cputime_to_usecs(__ct) \
+ jiffies_to_usecs(cputime_to_jiffies(__ct))
+#define usecs_to_cputime(__usec) \
+ jiffies_to_cputime(usecs_to_jiffies(__usec))
+#define usecs_to_cputime64(__usec) \
+ jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000))
+
+/*
+ * Convert cputime to seconds and back.
+ */
+#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ)
+#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ)
+
+/*
+ * Convert cputime to timespec and back.
+ */
+#define timespec_to_cputime(__val) \
+ jiffies_to_cputime(timespec_to_jiffies(__val))
+#define cputime_to_timespec(__ct,__val) \
+ jiffies_to_timespec(cputime_to_jiffies(__ct),__val)
+
+/*
+ * Convert cputime to timeval and back.
+ */
+#define timeval_to_cputime(__val) \
+ jiffies_to_cputime(timeval_to_jiffies(__val))
+#define cputime_to_timeval(__ct,__val) \
+ jiffies_to_timeval(cputime_to_jiffies(__ct),__val)
+
+/*
+ * Convert cputime to clock and back.
+ */
+#define cputime_to_clock_t(__ct) \
+ jiffies_to_clock_t(cputime_to_jiffies(__ct))
+#define clock_t_to_cputime(__x) \
+ jiffies_to_cputime(clock_t_to_jiffies(__x))
+
+/*
+ * Convert cputime64 to clock.
+ */
+#define cputime64_to_clock_t(__ct) \
+ jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))
+
+#endif
diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h
new file mode 100644
index 000000000000..b6485cafb7bd
--- /dev/null
+++ b/include/asm-generic/cputime_nsecs.h
@@ -0,0 +1,104 @@
+/*
+ * Definitions for measuring cputime in nsecs resolution.
+ *
+ * Based on <arch/ia64/include/asm/cputime.h>
+ *
+ * Copyright (C) 2007 FUJITSU LIMITED
+ * Copyright (C) 2007 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#ifndef _ASM_GENERIC_CPUTIME_NSECS_H
+#define _ASM_GENERIC_CPUTIME_NSECS_H
+
+typedef u64 __nocast cputime_t;
+typedef u64 __nocast cputime64_t;
+
+#define cputime_one_jiffy jiffies_to_cputime(1)
+
+/*
+ * Convert cputime <-> jiffies (HZ)
+ */
+#define cputime_to_jiffies(__ct) \
+ ((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
+#define cputime_to_scaled(__ct) (__ct)
+#define jiffies_to_cputime(__jif) \
+ (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
+#define cputime64_to_jiffies64(__ct) \
+ ((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
+#define jiffies64_to_cputime64(__jif) \
+ (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
+
+
+/*
+ * Convert cputime <-> nanoseconds
+ */
+#define nsecs_to_cputime(__nsecs) ((__force u64)(__nsecs))
+
+
+/*
+ * Convert cputime <-> microseconds
+ */
+#define cputime_to_usecs(__ct) \
+ ((__force u64)(__ct) / NSEC_PER_USEC)
+#define usecs_to_cputime(__usecs) \
+ (__force cputime_t)((__usecs) * NSEC_PER_USEC)
+#define usecs_to_cputime64(__usecs) \
+ (__force cputime64_t)((__usecs) * NSEC_PER_USEC)
+
+/*
+ * Convert cputime <-> seconds
+ */
+#define cputime_to_secs(__ct) \
+ ((__force u64)(__ct) / NSEC_PER_SEC)
+#define secs_to_cputime(__secs) \
+ (__force cputime_t)((__secs) * NSEC_PER_SEC)
+
+/*
+ * Convert cputime <-> timespec (nsec)
+ */
+static inline cputime_t timespec_to_cputime(const struct timespec *val)
+{
+ u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
+ return (__force cputime_t) ret;
+}
+static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
+{
+ val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
+ val->tv_nsec = (__force u64) ct % NSEC_PER_SEC;
+}
+
+/*
+ * Convert cputime <-> timeval (msec)
+ */
+static inline cputime_t timeval_to_cputime(struct timeval *val)
+{
+ u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC;
+ return (__force cputime_t) ret;
+}
+static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
+{
+ val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
+ val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC;
+}
+
+/*
+ * Convert cputime <-> clock (USER_HZ)
+ */
+#define cputime_to_clock_t(__ct) \
+ ((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ))
+#define clock_t_to_cputime(__x) \
+ (__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ))
+
+/*
+ * Convert cputime64 to clock.
+ */
+#define cputime64_to_clock_t(__ct) \
+ cputime_to_clock_t((__force cputime_t)__ct)
+
+#endif
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 544abdb2238c..ec10e1b24c1c 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -49,8 +49,8 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
}
#endif
-extern void cper_print_aer(const char *prefix, int cper_severity,
- struct aer_capability_regs *aer);
+extern void cper_print_aer(const char *prefix, struct pci_dev *dev,
+ int cper_severity, struct aer_capability_regs *aer);
extern int cper_severity_to_aer(int cper_severity);
extern void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
int severity);
diff --git a/include/linux/async.h b/include/linux/async.h
index 7a24fe9b44b4..a2e3f18b2ad6 100644
--- a/include/linux/async.h
+++ b/include/linux/async.h
@@ -19,8 +19,7 @@ typedef u64 async_cookie_t;
typedef void (async_func_ptr) (void *data, async_cookie_t cookie);
struct async_domain {
struct list_head node;
- struct list_head domain;
- int count;
+ struct list_head pending;
unsigned registered:1;
};
@@ -29,8 +28,7 @@ struct async_domain {
*/
#define ASYNC_DOMAIN(_name) \
struct async_domain _name = { .node = LIST_HEAD_INIT(_name.node), \
- .domain = LIST_HEAD_INIT(_name.domain), \
- .count = 0, \
+ .pending = LIST_HEAD_INIT(_name.pending), \
.registered = 1 }
/*
@@ -39,8 +37,7 @@ struct async_domain {
*/
#define ASYNC_DOMAIN_EXCLUSIVE(_name) \
struct async_domain _name = { .node = LIST_HEAD_INIT(_name.node), \
- .domain = LIST_HEAD_INIT(_name.domain), \
- .count = 0, \
+ .pending = LIST_HEAD_INIT(_name.pending), \
.registered = 0 }
extern async_cookie_t async_schedule(async_func_ptr *ptr, void *data);
@@ -52,4 +49,5 @@ extern void async_synchronize_full_domain(struct async_domain *domain);
extern void async_synchronize_cookie(async_cookie_t cookie);
extern void async_synchronize_cookie_domain(async_cookie_t cookie,
struct async_domain *domain);
+extern bool current_is_async(void);
#endif
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 7d73905dcba2..900af5964f55 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -203,6 +203,7 @@ struct cgroup {
/* For RCU-protected deletion */
struct rcu_head rcu_head;
+ struct work_struct free_work;
/* List of events which userspace want to receive */
struct list_head event_list;
@@ -558,6 +559,7 @@ static inline struct cgroup* task_cgroup(struct task_struct *task,
struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
struct cgroup *cgroup);
+struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
/**
* cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants
@@ -706,7 +708,6 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id);
static inline int cgroup_init_early(void) { return 0; }
static inline int cgroup_init(void) { return 0; }
static inline void cgroup_fork(struct task_struct *p) {}
-static inline void cgroup_fork_callbacks(struct task_struct *p) {}
static inline void cgroup_post_fork(struct task_struct *p) {}
static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 8a7096fcb01e..66346521cb65 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -161,6 +161,15 @@ clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec)
extern void clockevents_suspend(void);
extern void clockevents_resume(void);
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+#ifdef CONFIG_ARCH_HAS_TICK_BROADCAST
+extern void tick_broadcast(const struct cpumask *mask);
+#else
+#define tick_broadcast NULL
+#endif
+extern int tick_receive_broadcast(void);
+#endif
+
#ifdef CONFIG_GENERIC_CLOCKEVENTS
extern void clockevents_notify(unsigned long reason, void *arg);
#else
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index e24339ccb7f0..b28d161c1091 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -3,12 +3,40 @@
#ifdef CONFIG_CONTEXT_TRACKING
#include <linux/sched.h>
+#include <linux/percpu.h>
+
+struct context_tracking {
+ /*
+ * When active is false, probes are unset in order
+ * to minimize overhead: TIF flags are cleared
+ * and calls to user_enter/exit are ignored. This
+ * may be further optimized using static keys.
+ */
+ bool active;
+ enum {
+ IN_KERNEL = 0,
+ IN_USER,
+ } state;
+};
+
+DECLARE_PER_CPU(struct context_tracking, context_tracking);
+
+static inline bool context_tracking_in_user(void)
+{
+ return __this_cpu_read(context_tracking.state) == IN_USER;
+}
+
+static inline bool context_tracking_active(void)
+{
+ return __this_cpu_read(context_tracking.active);
+}
extern void user_enter(void);
extern void user_exit(void);
extern void context_tracking_task_switch(struct task_struct *prev,
struct task_struct *next);
#else
+static inline bool context_tracking_in_user(void) { return false; }
static inline void user_enter(void) { }
static inline void user_exit(void) { }
static inline void context_tracking_task_switch(struct task_struct *prev,
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index c03af7687bb4..186620631750 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -138,6 +138,7 @@ extern void elv_drain_elevator(struct request_queue *);
/*
* io scheduler registration
*/
+extern void __init load_default_elevator_module(void);
extern int elv_register(struct elevator_type *);
extern void elv_unregister(struct elevator_type *);
@@ -206,5 +207,9 @@ enum {
INIT_LIST_HEAD(&(rq)->csd.list); \
} while (0)
+#else /* CONFIG_BLOCK */
+
+static inline void load_default_elevator_module(void) { }
+
#endif /* CONFIG_BLOCK */
#endif
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 92691d85c320..e5ca8ef50e9b 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -74,7 +74,7 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
* SAVE_REGS - The ftrace_ops wants regs saved at each function called
* and passed to the callback. If this flag is set, but the
* architecture does not support passing regs
- * (ARCH_SUPPORTS_FTRACE_SAVE_REGS is not defined), then the
+ * (CONFIG_DYNAMIC_FTRACE_WITH_REGS is not defined), then the
* ftrace_ops will fail to register, unless the next flag
* is set.
* SAVE_REGS_IF_SUPPORTED - This is the same as SAVE_REGS, but if the
@@ -418,7 +418,7 @@ void ftrace_modify_all_code(int command);
#endif
#ifndef FTRACE_REGS_ADDR
-#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
# define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller)
#else
# define FTRACE_REGS_ADDR FTRACE_ADDR
@@ -480,7 +480,7 @@ extern int ftrace_make_nop(struct module *mod,
*/
extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
-#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
/**
* ftrace_modify_call - convert from one addr to another (no nop)
* @rec: the mcount call site record
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index a3d489531d83..13a54d0bdfa8 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -49,7 +49,6 @@ struct trace_entry {
unsigned char flags;
unsigned char preempt_count;
int pid;
- int padding;
};
#define FTRACE_MAX_EVENT \
@@ -84,6 +83,9 @@ struct trace_iterator {
long idx;
cpumask_var_t started;
+
+ /* it's true when current open file is snapshot */
+ bool snapshot;
};
enum trace_iter_flags {
@@ -272,7 +274,7 @@ extern int trace_define_field(struct ftrace_event_call *call, const char *type,
extern int trace_add_event_call(struct ftrace_event_call *call);
extern void trace_remove_event_call(struct ftrace_event_call *call);
-#define is_signed_type(type) (((type)(-1)) < 0)
+#define is_signed_type(type) (((type)(-1)) < (type)0)
int trace_set_clr_event(const char *system, const char *event, int set);
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 624ef3f45c8e..29eb805ea4a6 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -153,7 +153,7 @@ extern void rcu_nmi_exit(void);
*/
#define __irq_enter() \
do { \
- vtime_account_irq_enter(current); \
+ account_irq_enter_time(current); \
add_preempt_count(HARDIRQ_OFFSET); \
trace_hardirq_enter(); \
} while (0)
@@ -169,7 +169,7 @@ extern void irq_enter(void);
#define __irq_exit() \
do { \
trace_hardirq_exit(); \
- vtime_account_irq_exit(current); \
+ account_irq_exit_time(current); \
sub_preempt_count(HARDIRQ_OFFSET); \
} while (0)
@@ -180,10 +180,10 @@ extern void irq_exit(void);
#define nmi_enter() \
do { \
+ lockdep_off(); \
ftrace_nmi_enter(); \
BUG_ON(in_nmi()); \
add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
- lockdep_off(); \
rcu_nmi_enter(); \
trace_hardirq_enter(); \
} while (0)
@@ -192,10 +192,10 @@ extern void irq_exit(void);
do { \
trace_hardirq_exit(); \
rcu_nmi_exit(); \
- lockdep_on(); \
BUG_ON(!in_nmi()); \
sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
ftrace_nmi_exit(); \
+ lockdep_on(); \
} while (0)
#endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/init.h b/include/linux/init.h
index 10ed4f436458..861814710d52 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -153,6 +153,7 @@ extern unsigned int reset_devices;
/* used by init/main.c */
void setup_arch(char **);
void prepare_namespace(void);
+void __init load_default_modules(void);
extern void (*late_time_init)(void);
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 6d087c5f57f7..5cd0f0949927 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -10,7 +10,9 @@
#include <linux/pid_namespace.h>
#include <linux/user_namespace.h>
#include <linux/securebits.h>
+#include <linux/seqlock.h>
#include <net/net_namespace.h>
+#include <linux/sched/rt.h>
#ifdef CONFIG_SMP
# define INIT_PUSHABLE_TASKS(tsk) \
@@ -141,6 +143,15 @@ extern struct task_group root_task_group;
# define INIT_PERF_EVENTS(tsk)
#endif
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+# define INIT_VTIME(tsk) \
+ .vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \
+ .vtime_snap = 0, \
+ .vtime_snap_whence = VTIME_SYS,
+#else
+# define INIT_VTIME(tsk)
+#endif
+
#define INIT_TASK_COMM "swapper"
/*
@@ -210,6 +221,7 @@ extern struct task_group root_task_group;
INIT_TRACE_RECURSION \
INIT_TASK_RCU_PREEMPT(tsk) \
INIT_CPUSET_SEQ \
+ INIT_VTIME(tsk) \
}
diff --git a/include/linux/irq.h b/include/linux/irq.h
index fdf2c4a238cc..bc4e06611958 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -509,8 +509,11 @@ static inline void irq_set_percpu_devid_flags(unsigned int irq)
/* Handle dynamic irq creation and destruction */
extern unsigned int create_irq_nr(unsigned int irq_want, int node);
+extern unsigned int __create_irqs(unsigned int from, unsigned int count,
+ int node);
extern int create_irq(void);
extern void destroy_irq(unsigned int irq);
+extern void destroy_irqs(unsigned int irq, unsigned int count);
/*
* Dynamic irq helper functions. Obsolete. Use irq_alloc_desc* and
@@ -528,6 +531,8 @@ extern int irq_set_handler_data(unsigned int irq, void *data);
extern int irq_set_chip_data(unsigned int irq, void *data);
extern int irq_set_irq_type(unsigned int irq, unsigned int type);
extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry);
+extern int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset,
+ struct msi_desc *entry);
extern struct irq_data *irq_get_irq_data(unsigned int irq);
static inline struct irq_chip *irq_get_chip(unsigned int irq)
@@ -590,6 +595,9 @@ int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
#define irq_alloc_desc_from(from, node) \
irq_alloc_descs(-1, from, 1, node)
+#define irq_alloc_descs_from(from, cnt, node) \
+ irq_alloc_descs(-1, from, cnt, node)
+
void irq_free_descs(unsigned int irq, unsigned int cnt);
int irq_reserve_irqs(unsigned int from, unsigned int cnt);
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index 6a9e8f5399e2..f5dbce50466e 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -3,6 +3,20 @@
#include <linux/llist.h>
+/*
+ * An entry can be in one of four states:
+ *
+ * free NULL, 0 -> {claimed} : free to be used
+ * claimed NULL, 3 -> {pending} : claimed to be enqueued
+ * pending next, 3 -> {busy} : queued, pending callback
+ * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
+ */
+
+#define IRQ_WORK_PENDING 1UL
+#define IRQ_WORK_BUSY 2UL
+#define IRQ_WORK_FLAGS 3UL
+#define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */
+
struct irq_work {
unsigned long flags;
struct llist_node llnode;
@@ -16,8 +30,14 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))
work->func = func;
}
-bool irq_work_queue(struct irq_work *work);
+void irq_work_queue(struct irq_work *work);
void irq_work_run(void);
void irq_work_sync(struct irq_work *work);
+#ifdef CONFIG_IRQ_WORK
+bool irq_work_needs_cpu(void);
+#else
+static bool irq_work_needs_cpu(void) { return false; }
+#endif
+
#endif /* _LINUX_IRQ_WORK_H */
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 66b70780e910..ed5f6ed6eb77 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -127,7 +127,7 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t)
extern void account_steal_time(cputime_t);
extern void account_idle_time(cputime_t);
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
static inline void account_process_tick(struct task_struct *tsk, int user)
{
vtime_account_user(tsk);
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 23755ba42abc..4b6ef4d33cc2 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -49,16 +49,6 @@
#define KPROBE_REENTER 0x00000004
#define KPROBE_HIT_SSDONE 0x00000008
-/*
- * If function tracer is enabled and the arch supports full
- * passing of pt_regs to function tracing, then kprobes can
- * optimize on top of function tracing.
- */
-#if defined(CONFIG_FUNCTION_TRACER) && defined(ARCH_SUPPORTS_FTRACE_SAVE_REGS) \
- && defined(ARCH_SUPPORTS_KPROBES_ON_FTRACE)
-# define KPROBES_CAN_USE_FTRACE
-#endif
-
/* Attach to insert probes on any functions which should be ignored*/
#define __kprobes __attribute__((__section__(".kprobes.text")))
@@ -316,7 +306,7 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table,
#endif
#endif /* CONFIG_OPTPROBES */
-#ifdef KPROBES_CAN_USE_FTRACE
+#ifdef CONFIG_KPROBES_ON_FTRACE
extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct pt_regs *regs);
extern int arch_prepare_kprobe_ftrace(struct kprobe *p);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2c497ab0d03d..b7996a768eb2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -22,6 +22,7 @@
#include <linux/rcupdate.h>
#include <linux/ratelimit.h>
#include <linux/err.h>
+#include <linux/irqflags.h>
#include <asm/signal.h>
#include <linux/kvm.h>
@@ -740,15 +741,52 @@ static inline int kvm_deassign_device(struct kvm *kvm,
}
#endif /* CONFIG_IOMMU_API */
-static inline void kvm_guest_enter(void)
+static inline void __guest_enter(void)
{
- BUG_ON(preemptible());
/*
* This is running in ioctl context so we can avoid
* the call to vtime_account() with its unnecessary idle check.
*/
- vtime_account_system_irqsafe(current);
+ vtime_account_system(current);
current->flags |= PF_VCPU;
+}
+
+static inline void __guest_exit(void)
+{
+ /*
+ * This is running in ioctl context so we can avoid
+ * the call to vtime_account() with its unnecessary idle check.
+ */
+ vtime_account_system(current);
+ current->flags &= ~PF_VCPU;
+}
+
+#ifdef CONFIG_CONTEXT_TRACKING
+extern void guest_enter(void);
+extern void guest_exit(void);
+
+#else /* !CONFIG_CONTEXT_TRACKING */
+static inline void guest_enter(void)
+{
+ __guest_enter();
+}
+
+static inline void guest_exit(void)
+{
+ __guest_exit();
+}
+#endif /* !CONFIG_CONTEXT_TRACKING */
+
+static inline void kvm_guest_enter(void)
+{
+ unsigned long flags;
+
+ BUG_ON(preemptible());
+
+ local_irq_save(flags);
+ guest_enter();
+ local_irq_restore(flags);
+
/* KVM does not hold any references to rcu protected data when it
* switches CPU into a guest mode. In fact switching to a guest mode
* is very similar to exiting to userspase from rcu point of view. In
@@ -761,12 +799,11 @@ static inline void kvm_guest_enter(void)
static inline void kvm_guest_exit(void)
{
- /*
- * This is running in ioctl context so we can avoid
- * the call to vtime_account() with its unnecessary idle check.
- */
- vtime_account_system_irqsafe(current);
- current->flags &= ~PF_VCPU;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ guest_exit();
+ local_irq_restore(flags);
}
/*
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 15472d691ee6..6fa4dd2a3b9e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1101,6 +1101,12 @@ static inline int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
return -1;
}
+static inline int
+pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec)
+{
+ return -1;
+}
+
static inline void pci_msi_shutdown(struct pci_dev *dev)
{ }
static inline void pci_disable_msi(struct pci_dev *dev)
@@ -1132,6 +1138,7 @@ static inline int pci_msi_enabled(void)
}
#else
extern int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec);
+extern int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec);
extern void pci_msi_shutdown(struct pci_dev *dev);
extern void pci_disable_msi(struct pci_dev *dev);
extern int pci_msix_table_size(struct pci_dev *dev);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 6bfb2faa0b19..e47ee462c2f2 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -135,16 +135,21 @@ struct hw_perf_event {
struct { /* software */
struct hrtimer hrtimer;
};
+ struct { /* tracepoint */
+ struct task_struct *tp_target;
+ /* for tp_event->class */
+ struct list_head tp_list;
+ };
#ifdef CONFIG_HAVE_HW_BREAKPOINT
struct { /* breakpoint */
- struct arch_hw_breakpoint info;
- struct list_head bp_list;
/*
* Crufty hack to avoid the chicken and egg
* problem hw_breakpoint has with context
* creation and event initalization.
*/
struct task_struct *bp_target;
+ struct arch_hw_breakpoint info;
+ struct list_head bp_list;
};
#endif
};
@@ -817,6 +822,17 @@ do { \
} while (0)
+struct perf_pmu_events_attr {
+ struct device_attribute attr;
+ u64 id;
+};
+
+#define PMU_EVENT_ATTR(_name, _var, _id, _show) \
+static struct perf_pmu_events_attr _var = { \
+ .attr = __ATTR(_name, 0444, _show, NULL), \
+ .id = _id, \
+};
+
#define PMU_FORMAT_ATTR(_name, _format) \
static ssize_t \
_name##_show(struct device *dev, \
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 9afc01e5a0a6..86c4b6294713 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -98,9 +98,6 @@ int no_printk(const char *fmt, ...)
extern asmlinkage __printf(1, 2)
void early_printk(const char *fmt, ...);
-extern int printk_needs_cpu(int cpu);
-extern void printk_tick(void);
-
#ifdef CONFIG_PRINTK
asmlinkage __printf(5, 0)
int vprintk_emit(int facility, int level,
diff --git a/include/linux/profile.h b/include/linux/profile.h
index a0fc32279fc0..21123902366d 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -82,9 +82,6 @@ int task_handoff_unregister(struct notifier_block * n);
int profile_event_register(enum profile_type, struct notifier_block * n);
int profile_event_unregister(enum profile_type, struct notifier_block * n);
-int register_timer_hook(int (*hook)(struct pt_regs *));
-void unregister_timer_hook(int (*hook)(struct pt_regs *));
-
struct pt_regs;
#else
@@ -135,16 +132,6 @@ static inline int profile_event_unregister(enum profile_type t, struct notifier_
#define profile_handoff_task(a) (0)
#define profile_munmap(a) do { } while (0)
-static inline int register_timer_hook(int (*hook)(struct pt_regs *))
-{
- return -ENOSYS;
-}
-
-static inline void unregister_timer_hook(int (*hook)(struct pt_regs *))
-{
- return;
-}
-
#endif /* CONFIG_PROFILING */
#endif /* _LINUX_PROFILE_H */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 275aa3f1062d..b758ce17b309 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -53,7 +53,10 @@ extern int rcutorture_runnable; /* for sysctl */
extern void rcutorture_record_test_transition(void);
extern void rcutorture_record_progress(unsigned long vernum);
extern void do_trace_rcu_torture_read(char *rcutorturename,
- struct rcu_head *rhp);
+ struct rcu_head *rhp,
+ unsigned long secs,
+ unsigned long c_old,
+ unsigned long c);
#else
static inline void rcutorture_record_test_transition(void)
{
@@ -63,9 +66,13 @@ static inline void rcutorture_record_progress(unsigned long vernum)
}
#ifdef CONFIG_RCU_TRACE
extern void do_trace_rcu_torture_read(char *rcutorturename,
- struct rcu_head *rhp);
+ struct rcu_head *rhp,
+ unsigned long secs,
+ unsigned long c_old,
+ unsigned long c);
#else
-#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
+#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
+ do { } while (0)
#endif
#endif
@@ -749,7 +756,7 @@ static inline void rcu_preempt_sleep_check(void)
* preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU)
* in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may
* be preempted, but explicit blocking is illegal. Finally, in preemptible
- * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds,
+ * RCU implementations in real-time (with -rt patchset) kernel builds,
* RCU read-side critical sections may be preempted and they may also
* block, but only when acquiring spinlocks that are subject to priority
* inheritance.
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 519777e3fa01..1342e69542f3 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -167,6 +167,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu);
unsigned long ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu);
u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu);
void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 9531845c419f..11d05f9fe8b6 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -138,6 +138,7 @@ extern void rtc_device_unregister(struct rtc_device *rtc);
extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm);
extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm);
extern int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs);
+extern int rtc_set_ntp_time(struct timespec now);
int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm);
extern int rtc_read_alarm(struct rtc_device *rtc,
struct rtc_wkalrm *alrm);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d2112477ff5e..e4112aad2964 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -304,19 +304,6 @@ static inline void lockup_detector_init(void)
}
#endif
-#ifdef CONFIG_DETECT_HUNG_TASK
-extern unsigned int sysctl_hung_task_panic;
-extern unsigned long sysctl_hung_task_check_count;
-extern unsigned long sysctl_hung_task_timeout_secs;
-extern unsigned long sysctl_hung_task_warnings;
-extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos);
-#else
-/* Avoid need for ifdefs elsewhere in the code */
-enum { sysctl_hung_task_timeout_secs = 0 };
-#endif
-
/* Attach to any functions which should be ignored in wchan output. */
#define __sched __attribute__((__section__(".sched.text")))
@@ -338,23 +325,6 @@ extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner);
struct nsproxy;
struct user_namespace;
-/*
- * Default maximum number of active map areas, this limits the number of vmas
- * per mm struct. Users can overwrite this number by sysctl but there is a
- * problem.
- *
- * When a program's coredump is generated as ELF format, a section is created
- * per a vma. In ELF, the number of sections is represented in unsigned short.
- * This means the number of sections should be smaller than 65535 at coredump.
- * Because the kernel adds some informative sections to a image of program at
- * generating coredump, we need some margin. The number of extra sections is
- * 1-3 now and depends on arch. We use "5" as safe margin, here.
- */
-#define MAPCOUNT_ELF_CORE_MARGIN (5)
-#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
-
-extern int sysctl_max_map_count;
-
#include <linux/aio.h>
#ifdef CONFIG_MMU
@@ -1194,6 +1164,7 @@ struct sched_entity {
/* rq "owned" by this entity/group: */
struct cfs_rq *my_q;
#endif
+
/*
* Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
* removed when useful for applications beyond shares distribution (e.g.
@@ -1208,6 +1179,7 @@ struct sched_entity {
struct sched_rt_entity {
struct list_head run_list;
unsigned long timeout;
+ unsigned long watchdog_stamp;
unsigned int time_slice;
struct sched_rt_entity *back;
@@ -1220,11 +1192,6 @@ struct sched_rt_entity {
#endif
};
-/*
- * default timeslice is 100 msecs (used only for SCHED_RR tasks).
- * Timeslices get refilled after they expire.
- */
-#define RR_TIMESLICE (100 * HZ / 1000)
struct rcu_node;
@@ -1368,6 +1335,15 @@ struct task_struct {
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
struct cputime prev_cputime;
#endif
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+ seqlock_t vtime_seqlock;
+ unsigned long long vtime_snap;
+ enum {
+ VTIME_SLEEPING = 0,
+ VTIME_USER,
+ VTIME_SYS,
+ } vtime_snap_whence;
+#endif
unsigned long nvcsw, nivcsw; /* context switch counts */
struct timespec start_time; /* monotonic time */
struct timespec real_start_time; /* boot based time */
@@ -1622,37 +1598,6 @@ static inline void set_numabalancing_state(bool enabled)
}
#endif
-/*
- * Priority of a process goes from 0..MAX_PRIO-1, valid RT
- * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
- * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
- * values are inverted: lower p->prio value means higher priority.
- *
- * The MAX_USER_RT_PRIO value allows the actual maximum
- * RT priority to be separate from the value exported to
- * user-space. This allows kernel threads to set their
- * priority to a value higher than any user task. Note:
- * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
- */
-
-#define MAX_USER_RT_PRIO 100
-#define MAX_RT_PRIO MAX_USER_RT_PRIO
-
-#define MAX_PRIO (MAX_RT_PRIO + 40)
-#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
-
-static inline int rt_prio(int prio)
-{
- if (unlikely(prio < MAX_RT_PRIO))
- return 1;
- return 0;
-}
-
-static inline int rt_task(struct task_struct *p)
-{
- return rt_prio(p->prio);
-}
-
static inline struct pid *task_pid(struct task_struct *task)
{
return task->pids[PIDTYPE_PID].pid;
@@ -1792,6 +1737,37 @@ static inline void put_task_struct(struct task_struct *t)
__put_task_struct(t);
}
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+extern void task_cputime(struct task_struct *t,
+ cputime_t *utime, cputime_t *stime);
+extern void task_cputime_scaled(struct task_struct *t,
+ cputime_t *utimescaled, cputime_t *stimescaled);
+extern cputime_t task_gtime(struct task_struct *t);
+#else
+static inline void task_cputime(struct task_struct *t,
+ cputime_t *utime, cputime_t *stime)
+{
+ if (utime)
+ *utime = t->utime;
+ if (stime)
+ *stime = t->stime;
+}
+
+static inline void task_cputime_scaled(struct task_struct *t,
+ cputime_t *utimescaled,
+ cputime_t *stimescaled)
+{
+ if (utimescaled)
+ *utimescaled = t->utimescaled;
+ if (stimescaled)
+ *stimescaled = t->stimescaled;
+}
+
+static inline cputime_t task_gtime(struct task_struct *t)
+{
+ return t->gtime;
+}
+#endif
extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
@@ -2033,58 +2009,7 @@ extern void wake_up_idle_cpu(int cpu);
static inline void wake_up_idle_cpu(int cpu) { }
#endif
-extern unsigned int sysctl_sched_latency;
-extern unsigned int sysctl_sched_min_granularity;
-extern unsigned int sysctl_sched_wakeup_granularity;
-extern unsigned int sysctl_sched_child_runs_first;
-
-enum sched_tunable_scaling {
- SCHED_TUNABLESCALING_NONE,
- SCHED_TUNABLESCALING_LOG,
- SCHED_TUNABLESCALING_LINEAR,
- SCHED_TUNABLESCALING_END,
-};
-extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
-
-extern unsigned int sysctl_numa_balancing_scan_delay;
-extern unsigned int sysctl_numa_balancing_scan_period_min;
-extern unsigned int sysctl_numa_balancing_scan_period_max;
-extern unsigned int sysctl_numa_balancing_scan_period_reset;
-extern unsigned int sysctl_numa_balancing_scan_size;
-extern unsigned int sysctl_numa_balancing_settle_count;
-
-#ifdef CONFIG_SCHED_DEBUG
-extern unsigned int sysctl_sched_migration_cost;
-extern unsigned int sysctl_sched_nr_migrate;
-extern unsigned int sysctl_sched_time_avg;
-extern unsigned int sysctl_timer_migration;
-extern unsigned int sysctl_sched_shares_window;
-
-int sched_proc_update_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *length,
- loff_t *ppos);
-#endif
-#ifdef CONFIG_SCHED_DEBUG
-static inline unsigned int get_sysctl_timer_migration(void)
-{
- return sysctl_timer_migration;
-}
-#else
-static inline unsigned int get_sysctl_timer_migration(void)
-{
- return 1;
-}
-#endif
-extern unsigned int sysctl_sched_rt_period;
-extern int sysctl_sched_rt_runtime;
-
-int sched_rt_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos);
-
#ifdef CONFIG_SCHED_AUTOGROUP
-extern unsigned int sysctl_sched_autogroup_enabled;
-
extern void sched_autogroup_create_attach(struct task_struct *p);
extern void sched_autogroup_detach(struct task_struct *p);
extern void sched_autogroup_fork(struct signal_struct *sig);
@@ -2100,30 +2025,6 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { }
static inline void sched_autogroup_exit(struct signal_struct *sig) { }
#endif
-#ifdef CONFIG_CFS_BANDWIDTH
-extern unsigned int sysctl_sched_cfs_bandwidth_slice;
-#endif
-
-#ifdef CONFIG_RT_MUTEXES
-extern int rt_mutex_getprio(struct task_struct *p);
-extern void rt_mutex_setprio(struct task_struct *p, int prio);
-extern void rt_mutex_adjust_pi(struct task_struct *p);
-static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
-{
- return tsk->pi_blocked_on != NULL;
-}
-#else
-static inline int rt_mutex_getprio(struct task_struct *p)
-{
- return p->normal_prio;
-}
-# define rt_mutex_adjust_pi(p) do { } while (0)
-static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
-{
- return false;
-}
-#endif
-
extern bool yield_to(struct task_struct *p, bool preempt);
extern void set_user_nice(struct task_struct *p, long nice);
extern int task_prio(const struct task_struct *p);
@@ -2753,14 +2654,15 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
-extern void normalize_rt_tasks(void);
-
#ifdef CONFIG_CGROUP_SCHED
extern struct task_group root_task_group;
extern struct task_group *sched_create_group(struct task_group *parent);
+extern void sched_online_group(struct task_group *tg,
+ struct task_group *parent);
extern void sched_destroy_group(struct task_group *tg);
+extern void sched_offline_group(struct task_group *tg);
extern void sched_move_task(struct task_struct *tsk);
#ifdef CONFIG_FAIR_GROUP_SCHED
extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
new file mode 100644
index 000000000000..94e19ea28fc3
--- /dev/null
+++ b/include/linux/sched/rt.h
@@ -0,0 +1,58 @@
+#ifndef _SCHED_RT_H
+#define _SCHED_RT_H
+
+/*
+ * Priority of a process goes from 0..MAX_PRIO-1, valid RT
+ * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
+ * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
+ * values are inverted: lower p->prio value means higher priority.
+ *
+ * The MAX_USER_RT_PRIO value allows the actual maximum
+ * RT priority to be separate from the value exported to
+ * user-space. This allows kernel threads to set their
+ * priority to a value higher than any user task. Note:
+ * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
+ */
+
+#define MAX_USER_RT_PRIO 100
+#define MAX_RT_PRIO MAX_USER_RT_PRIO
+
+#define MAX_PRIO (MAX_RT_PRIO + 40)
+#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
+
+static inline int rt_prio(int prio)
+{
+ if (unlikely(prio < MAX_RT_PRIO))
+ return 1;
+ return 0;
+}
+
+static inline int rt_task(struct task_struct *p)
+{
+ return rt_prio(p->prio);
+}
+
+#ifdef CONFIG_RT_MUTEXES
+extern int rt_mutex_getprio(struct task_struct *p);
+extern void rt_mutex_setprio(struct task_struct *p, int prio);
+extern void rt_mutex_adjust_pi(struct task_struct *p);
+static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+{
+ return tsk->pi_blocked_on != NULL;
+}
+#else
+static inline int rt_mutex_getprio(struct task_struct *p)
+{
+ return p->normal_prio;
+}
+# define rt_mutex_adjust_pi(p) do { } while (0)
+static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+{
+ return false;
+}
+#endif
+
+extern void normalize_rt_tasks(void);
+
+
+#endif /* _SCHED_RT_H */
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
new file mode 100644
index 000000000000..d2bb0ae979d0
--- /dev/null
+++ b/include/linux/sched/sysctl.h
@@ -0,0 +1,110 @@
+#ifndef _SCHED_SYSCTL_H
+#define _SCHED_SYSCTL_H
+
+#ifdef CONFIG_DETECT_HUNG_TASK
+extern unsigned int sysctl_hung_task_panic;
+extern unsigned long sysctl_hung_task_check_count;
+extern unsigned long sysctl_hung_task_timeout_secs;
+extern unsigned long sysctl_hung_task_warnings;
+extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos);
+#else
+/* Avoid need for ifdefs elsewhere in the code */
+enum { sysctl_hung_task_timeout_secs = 0 };
+#endif
+
+/*
+ * Default maximum number of active map areas, this limits the number of vmas
+ * per mm struct. Users can overwrite this number by sysctl but there is a
+ * problem.
+ *
+ * When a program's coredump is generated as ELF format, a section is created
+ * per a vma. In ELF, the number of sections is represented in unsigned short.
+ * This means the number of sections should be smaller than 65535 at coredump.
+ * Because the kernel adds some informative sections to a image of program at
+ * generating coredump, we need some margin. The number of extra sections is
+ * 1-3 now and depends on arch. We use "5" as safe margin, here.
+ */
+#define MAPCOUNT_ELF_CORE_MARGIN (5)
+#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
+
+extern int sysctl_max_map_count;
+
+extern unsigned int sysctl_sched_latency;
+extern unsigned int sysctl_sched_min_granularity;
+extern unsigned int sysctl_sched_wakeup_granularity;
+extern unsigned int sysctl_sched_child_runs_first;
+
+enum sched_tunable_scaling {
+ SCHED_TUNABLESCALING_NONE,
+ SCHED_TUNABLESCALING_LOG,
+ SCHED_TUNABLESCALING_LINEAR,
+ SCHED_TUNABLESCALING_END,
+};
+extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
+
+extern unsigned int sysctl_numa_balancing_scan_delay;
+extern unsigned int sysctl_numa_balancing_scan_period_min;
+extern unsigned int sysctl_numa_balancing_scan_period_max;
+extern unsigned int sysctl_numa_balancing_scan_period_reset;
+extern unsigned int sysctl_numa_balancing_scan_size;
+extern unsigned int sysctl_numa_balancing_settle_count;
+
+#ifdef CONFIG_SCHED_DEBUG
+extern unsigned int sysctl_sched_migration_cost;
+extern unsigned int sysctl_sched_nr_migrate;
+extern unsigned int sysctl_sched_time_avg;
+extern unsigned int sysctl_timer_migration;
+extern unsigned int sysctl_sched_shares_window;
+
+int sched_proc_update_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *length,
+ loff_t *ppos);
+#endif
+#ifdef CONFIG_SCHED_DEBUG
+static inline unsigned int get_sysctl_timer_migration(void)
+{
+ return sysctl_timer_migration;
+}
+#else
+static inline unsigned int get_sysctl_timer_migration(void)
+{
+ return 1;
+}
+#endif
+
+/*
+ * control realtime throttling:
+ *
+ * /proc/sys/kernel/sched_rt_period_us
+ * /proc/sys/kernel/sched_rt_runtime_us
+ */
+extern unsigned int sysctl_sched_rt_period;
+extern int sysctl_sched_rt_runtime;
+
+#ifdef CONFIG_CFS_BANDWIDTH
+extern unsigned int sysctl_sched_cfs_bandwidth_slice;
+#endif
+
+#ifdef CONFIG_SCHED_AUTOGROUP
+extern unsigned int sysctl_sched_autogroup_enabled;
+#endif
+
+/*
+ * default timeslice is 100 msecs (used only for SCHED_RR tasks).
+ * Timeslices get refilled after they expire.
+ */
+#define RR_TIMESLICE (100 * HZ / 1000)
+
+extern int sched_rr_timeslice;
+
+extern int sched_rr_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos);
+
+extern int sched_rt_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos);
+
+#endif /* _SCHED_SYSCTL_H */
diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h
index e0106d8581d3..c65dee059913 100644
--- a/include/linux/smpboot.h
+++ b/include/linux/smpboot.h
@@ -14,6 +14,8 @@ struct smpboot_thread_data;
* @thread_should_run: Check whether the thread should run or not. Called with
* preemption disabled.
* @thread_fn: The associated thread function
+ * @create: Optional setup function, called when the thread gets
+ * created (Not called from the thread context)
* @setup: Optional setup function, called when the thread gets
* operational the first time
* @cleanup: Optional cleanup function, called when the thread
@@ -22,6 +24,7 @@ struct smpboot_thread_data;
* parked (cpu offline)
* @unpark: Optional unpark function, called when the thread is
* unparked (cpu online)
+ * @selfparking: Thread is not parked by the park function.
* @thread_comm: The base name of the thread
*/
struct smp_hotplug_thread {
@@ -29,10 +32,12 @@ struct smp_hotplug_thread {
struct list_head list;
int (*thread_should_run)(unsigned int cpu);
void (*thread_fn)(unsigned int cpu);
+ void (*create)(unsigned int cpu);
void (*setup)(unsigned int cpu);
void (*cleanup)(unsigned int cpu, bool online);
void (*park)(unsigned int cpu);
void (*unpark)(unsigned int cpu);
+ bool selfparking;
const char *thread_comm;
};
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 6eb691b08358..04f4121a23ae 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -151,30 +151,14 @@ void srcu_barrier(struct srcu_struct *sp);
* Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
* and while lockdep is disabled.
*
- * Note that if the CPU is in the idle loop from an RCU point of view
- * (ie: that we are in the section between rcu_idle_enter() and
- * rcu_idle_exit()) then srcu_read_lock_held() returns false even if
- * the CPU did an srcu_read_lock(). The reason for this is that RCU
- * ignores CPUs that are in such a section, considering these as in
- * extended quiescent state, so such a CPU is effectively never in an
- * RCU read-side critical section regardless of what RCU primitives it
- * invokes. This state of affairs is required --- we need to keep an
- * RCU-free window in idle where the CPU may possibly enter into low
- * power mode. This way we can notice an extended quiescent state to
- * other CPUs that started a grace period. Otherwise we would delay any
- * grace period as long as we run in the idle task.
- *
- * Similarly, we avoid claiming an SRCU read lock held if the current
- * CPU is offline.
+ * Note that SRCU is based on its own statemachine and it doesn't
+ * relies on normal RCU, it can be called from the CPU which
+ * is in the idle loop from an RCU point of view or offline.
*/
static inline int srcu_read_lock_held(struct srcu_struct *sp)
{
if (!debug_lockdep_rcu_enabled())
return 1;
- if (rcu_is_cpu_idle())
- return 0;
- if (!rcu_lockdep_current_cpu_online())
- return 0;
return lock_is_held(&sp->dep_map);
}
@@ -236,8 +220,6 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
int retval = __srcu_read_lock(sp);
rcu_lock_acquire(&(sp)->dep_map);
- rcu_lockdep_assert(!rcu_is_cpu_idle(),
- "srcu_read_lock() used illegally while idle");
return retval;
}
@@ -251,8 +233,6 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
__releases(sp)
{
- rcu_lockdep_assert(!rcu_is_cpu_idle(),
- "srcu_read_unlock() used illegally while idle");
rcu_lock_release(&(sp)->dep_map);
__srcu_read_unlock(sp, idx);
}
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 1a6567b48492..553272e6af55 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -8,6 +8,8 @@
#include <linux/clockchips.h>
#include <linux/irqflags.h>
+#include <linux/percpu.h>
+#include <linux/hrtimer.h>
#ifdef CONFIG_GENERIC_CLOCKEVENTS
@@ -122,13 +124,26 @@ static inline int tick_oneshot_mode_active(void) { return 0; }
#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
# ifdef CONFIG_NO_HZ
+DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched);
+
+static inline int tick_nohz_tick_stopped(void)
+{
+ return __this_cpu_read(tick_cpu_sched.tick_stopped);
+}
+
extern void tick_nohz_idle_enter(void);
extern void tick_nohz_idle_exit(void);
extern void tick_nohz_irq_exit(void);
extern ktime_t tick_nohz_get_sleep_length(void);
extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
-# else
+
+# else /* !CONFIG_NO_HZ */
+static inline int tick_nohz_tick_stopped(void)
+{
+ return 0;
+}
+
static inline void tick_nohz_idle_enter(void) { }
static inline void tick_nohz_idle_exit(void) { }
diff --git a/include/linux/time.h b/include/linux/time.h
index 4d358e9d10f1..a3ab6a814a9c 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -115,8 +115,20 @@ static inline bool timespec_valid_strict(const struct timespec *ts)
return true;
}
+extern bool persistent_clock_exist;
+
+#ifdef ALWAYS_USE_PERSISTENT_CLOCK
+#define has_persistent_clock() true
+#else
+static inline bool has_persistent_clock(void)
+{
+ return persistent_clock_exist;
+}
+#endif
+
extern void read_persistent_clock(struct timespec *ts);
extern void read_boot_clock(struct timespec *ts);
+extern int persistent_clock_is_local;
extern int update_persistent_clock(struct timespec now);
void timekeeping_init(void);
extern int timekeeping_suspended;
@@ -158,6 +170,7 @@ extern int do_setitimer(int which, struct itimerval *value,
struct itimerval *ovalue);
extern unsigned int alarm_setitimer(unsigned int seconds);
extern int do_getitimer(int which, struct itimerval *value);
+extern int __getnstimeofday(struct timespec *tv);
extern void getnstimeofday(struct timespec *tv);
extern void getrawmonotonic(struct timespec *ts);
extern void getnstime_raw_and_real(struct timespec *ts_raw,
diff --git a/include/linux/tsacct_kern.h b/include/linux/tsacct_kern.h
index 44893e5ec8f7..3251965bf4cc 100644
--- a/include/linux/tsacct_kern.h
+++ b/include/linux/tsacct_kern.h
@@ -23,12 +23,15 @@ static inline void bacct_add_tsk(struct user_namespace *user_ns,
#ifdef CONFIG_TASK_XACCT
extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p);
extern void acct_update_integrals(struct task_struct *tsk);
+extern void acct_account_cputime(struct task_struct *tsk);
extern void acct_clear_integrals(struct task_struct *tsk);
#else
static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
{}
static inline void acct_update_integrals(struct task_struct *tsk)
{}
+static inline void acct_account_cputime(struct task_struct *tsk)
+{}
static inline void acct_clear_integrals(struct task_struct *tsk)
{}
#endif /* CONFIG_TASK_XACCT */
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 4f628a6fc5b4..02b83db8e2c5 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -35,13 +35,20 @@ struct inode;
# include <asm/uprobes.h>
#endif
+#define UPROBE_HANDLER_REMOVE 1
+#define UPROBE_HANDLER_MASK 1
+
+enum uprobe_filter_ctx {
+ UPROBE_FILTER_REGISTER,
+ UPROBE_FILTER_UNREGISTER,
+ UPROBE_FILTER_MMAP,
+};
+
struct uprobe_consumer {
int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
- /*
- * filter is optional; If a filter exists, handler is run
- * if and only if filter returns true.
- */
- bool (*filter)(struct uprobe_consumer *self, struct task_struct *task);
+ bool (*filter)(struct uprobe_consumer *self,
+ enum uprobe_filter_ctx ctx,
+ struct mm_struct *mm);
struct uprobe_consumer *next;
};
@@ -94,6 +101,7 @@ extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsign
extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
+extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
extern int uprobe_mmap(struct vm_area_struct *vma);
extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end);
@@ -117,6 +125,11 @@ uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
{
return -ENOSYS;
}
+static inline int
+uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add)
+{
+ return -ENOSYS;
+}
static inline void
uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
{
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index ae30ab58431a..71a5782d8c59 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -6,15 +6,46 @@ struct task_struct;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
extern void vtime_task_switch(struct task_struct *prev);
extern void vtime_account_system(struct task_struct *tsk);
-extern void vtime_account_system_irqsafe(struct task_struct *tsk);
extern void vtime_account_idle(struct task_struct *tsk);
extern void vtime_account_user(struct task_struct *tsk);
-extern void vtime_account(struct task_struct *tsk);
-#else
+extern void vtime_account_irq_enter(struct task_struct *tsk);
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+static inline bool vtime_accounting_enabled(void) { return true; }
+#endif
+
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
+
static inline void vtime_task_switch(struct task_struct *prev) { }
static inline void vtime_account_system(struct task_struct *tsk) { }
-static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { }
-static inline void vtime_account(struct task_struct *tsk) { }
+static inline void vtime_account_user(struct task_struct *tsk) { }
+static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
+static inline bool vtime_accounting_enabled(void) { return false; }
+#endif
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+extern void arch_vtime_task_switch(struct task_struct *tsk);
+extern void vtime_account_irq_exit(struct task_struct *tsk);
+extern bool vtime_accounting_enabled(void);
+extern void vtime_user_enter(struct task_struct *tsk);
+static inline void vtime_user_exit(struct task_struct *tsk)
+{
+ vtime_account_user(tsk);
+}
+extern void vtime_guest_enter(struct task_struct *tsk);
+extern void vtime_guest_exit(struct task_struct *tsk);
+extern void vtime_init_idle(struct task_struct *tsk);
+#else
+static inline void vtime_account_irq_exit(struct task_struct *tsk)
+{
+ /* On hard|softirq exit we always account to hard|softirq cputime */
+ vtime_account_system(tsk);
+}
+static inline void vtime_user_enter(struct task_struct *tsk) { }
+static inline void vtime_user_exit(struct task_struct *tsk) { }
+static inline void vtime_guest_enter(struct task_struct *tsk) { }
+static inline void vtime_guest_exit(struct task_struct *tsk) { }
+static inline void vtime_init_idle(struct task_struct *tsk) { }
#endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -23,25 +54,15 @@ extern void irqtime_account_irq(struct task_struct *tsk);
static inline void irqtime_account_irq(struct task_struct *tsk) { }
#endif
-static inline void vtime_account_irq_enter(struct task_struct *tsk)
+static inline void account_irq_enter_time(struct task_struct *tsk)
{
- /*
- * Hardirq can interrupt idle task anytime. So we need vtime_account()
- * that performs the idle check in CONFIG_VIRT_CPU_ACCOUNTING.
- * Softirq can also interrupt idle task directly if it calls
- * local_bh_enable(). Such case probably don't exist but we never know.
- * Ksoftirqd is not concerned because idle time is flushed on context
- * switch. Softirqs in the end of hardirqs are also not a problem because
- * the idle time is flushed on hardirq time already.
- */
- vtime_account(tsk);
+ vtime_account_irq_enter(tsk);
irqtime_account_irq(tsk);
}
-static inline void vtime_account_irq_exit(struct task_struct *tsk)
+static inline void account_irq_exit_time(struct task_struct *tsk)
{
- /* On hard|softirq exit we always account to hard|softirq cputime */
- vtime_account_system(tsk);
+ vtime_account_irq_exit(tsk);
irqtime_account_irq(tsk);
}
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 2b58905d3504..8afab27cdbc2 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -27,7 +27,7 @@ void delayed_work_timer_fn(unsigned long __data);
enum {
WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */
WORK_STRUCT_DELAYED_BIT = 1, /* work item is delayed */
- WORK_STRUCT_CWQ_BIT = 2, /* data points to cwq */
+ WORK_STRUCT_PWQ_BIT = 2, /* data points to pwq */
WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */
#ifdef CONFIG_DEBUG_OBJECTS_WORK
WORK_STRUCT_STATIC_BIT = 4, /* static initializer (debugobjects) */
@@ -40,7 +40,7 @@ enum {
WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT,
WORK_STRUCT_DELAYED = 1 << WORK_STRUCT_DELAYED_BIT,
- WORK_STRUCT_CWQ = 1 << WORK_STRUCT_CWQ_BIT,
+ WORK_STRUCT_PWQ = 1 << WORK_STRUCT_PWQ_BIT,
WORK_STRUCT_LINKED = 1 << WORK_STRUCT_LINKED_BIT,
#ifdef CONFIG_DEBUG_OBJECTS_WORK
WORK_STRUCT_STATIC = 1 << WORK_STRUCT_STATIC_BIT,
@@ -57,29 +57,36 @@ enum {
/* special cpu IDs */
WORK_CPU_UNBOUND = NR_CPUS,
- WORK_CPU_NONE = NR_CPUS + 1,
- WORK_CPU_LAST = WORK_CPU_NONE,
+ WORK_CPU_END = NR_CPUS + 1,
/*
- * Reserve 7 bits off of cwq pointer w/ debugobjects turned
- * off. This makes cwqs aligned to 256 bytes and allows 15
- * workqueue flush colors.
+ * Reserve 7 bits off of pwq pointer w/ debugobjects turned off.
+ * This makes pwqs aligned to 256 bytes and allows 15 workqueue
+ * flush colors.
*/
WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT +
WORK_STRUCT_COLOR_BITS,
- /* data contains off-queue information when !WORK_STRUCT_CWQ */
+ /* data contains off-queue information when !WORK_STRUCT_PWQ */
WORK_OFFQ_FLAG_BASE = WORK_STRUCT_FLAG_BITS,
WORK_OFFQ_CANCELING = (1 << WORK_OFFQ_FLAG_BASE),
+ /*
+ * When a work item is off queue, its high bits point to the last
+ * pool it was on. Cap at 31 bits and use the highest number to
+ * indicate that no pool is associated.
+ */
WORK_OFFQ_FLAG_BITS = 1,
- WORK_OFFQ_CPU_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
+ WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
+ WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT,
+ WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
+ WORK_OFFQ_POOL_NONE = (1LU << WORK_OFFQ_POOL_BITS) - 1,
/* convenience constants */
WORK_STRUCT_FLAG_MASK = (1UL << WORK_STRUCT_FLAG_BITS) - 1,
WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
- WORK_STRUCT_NO_CPU = (unsigned long)WORK_CPU_NONE << WORK_OFFQ_CPU_SHIFT,
+ WORK_STRUCT_NO_POOL = (unsigned long)WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT,
/* bit mask for work_busy() return values */
WORK_BUSY_PENDING = 1 << 0,
@@ -95,13 +102,16 @@ struct work_struct {
#endif
};
-#define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU)
+#define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL)
#define WORK_DATA_STATIC_INIT() \
- ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU | WORK_STRUCT_STATIC)
+ ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC)
struct delayed_work {
struct work_struct work;
struct timer_list timer;
+
+ /* target workqueue and CPU ->timer uses to queue ->work */
+ struct workqueue_struct *wq;
int cpu;
};
@@ -426,7 +436,6 @@ extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
extern void workqueue_set_max_active(struct workqueue_struct *wq,
int max_active);
extern bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq);
-extern unsigned int work_cpu(struct work_struct *work);
extern unsigned int work_busy(struct work_struct *work);
/*
diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h
new file mode 100644
index 000000000000..88b878383797
--- /dev/null
+++ b/include/trace/events/ras.h
@@ -0,0 +1,77 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ras
+
+#if !defined(_TRACE_AER_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_AER_H
+
+#include <linux/tracepoint.h>
+#include <linux/edac.h>
+
+
+/*
+ * PCIe AER Trace event
+ *
+ * These events are generated when hardware detects a corrected or
+ * uncorrected event on a PCIe device. The event report has
+ * the following structure:
+ *
+ * char * dev_name - The name of the slot where the device resides
+ * ([domain:]bus:device.function).
+ * u32 status - Either the correctable or uncorrectable register
+ * indicating what error or errors have been seen
+ * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED
+ */
+
+#define aer_correctable_errors \
+ {BIT(0), "Receiver Error"}, \
+ {BIT(6), "Bad TLP"}, \
+ {BIT(7), "Bad DLLP"}, \
+ {BIT(8), "RELAY_NUM Rollover"}, \
+ {BIT(12), "Replay Timer Timeout"}, \
+ {BIT(13), "Advisory Non-Fatal"}
+
+#define aer_uncorrectable_errors \
+ {BIT(4), "Data Link Protocol"}, \
+ {BIT(12), "Poisoned TLP"}, \
+ {BIT(13), "Flow Control Protocol"}, \
+ {BIT(14), "Completion Timeout"}, \
+ {BIT(15), "Completer Abort"}, \
+ {BIT(16), "Unexpected Completion"}, \
+ {BIT(17), "Receiver Overflow"}, \
+ {BIT(18), "Malformed TLP"}, \
+ {BIT(19), "ECRC"}, \
+ {BIT(20), "Unsupported Request"}
+
+TRACE_EVENT(aer_event,
+ TP_PROTO(const char *dev_name,
+ const u32 status,
+ const u8 severity),
+
+ TP_ARGS(dev_name, status, severity),
+
+ TP_STRUCT__entry(
+ __string( dev_name, dev_name )
+ __field( u32, status )
+ __field( u8, severity )
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev_name, dev_name);
+ __entry->status = status;
+ __entry->severity = severity;
+ ),
+
+ TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
+ __get_str(dev_name),
+ __entry->severity == HW_EVENT_ERR_CORRECTED ? "Corrected" :
+ __entry->severity == HW_EVENT_ERR_FATAL ?
+ "Fatal" : "Uncorrected",
+ __entry->severity == HW_EVENT_ERR_CORRECTED ?
+ __print_flags(__entry->status, "|", aer_correctable_errors) :
+ __print_flags(__entry->status, "|", aer_uncorrectable_errors))
+);
+
+#endif /* _TRACE_AER_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index d4f559b1ec34..1918e832da4f 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -44,8 +44,10 @@ TRACE_EVENT(rcu_utilization,
* of a new grace period or the end of an old grace period ("cpustart"
* and "cpuend", respectively), a CPU passing through a quiescent
* state ("cpuqs"), a CPU coming online or going offline ("cpuonl"
- * and "cpuofl", respectively), and a CPU being kicked for being too
- * long in dyntick-idle mode ("kick").
+ * and "cpuofl", respectively), a CPU being kicked for being too
+ * long in dyntick-idle mode ("kick"), a CPU accelerating its new
+ * callbacks to RCU_NEXT_READY_TAIL ("AccReadyCB"), and a CPU
+ * accelerating its new callbacks to RCU_WAIT_TAIL ("AccWaitCB").
*/
TRACE_EVENT(rcu_grace_period,
@@ -393,7 +395,7 @@ TRACE_EVENT(rcu_kfree_callback,
*/
TRACE_EVENT(rcu_batch_start,
- TP_PROTO(char *rcuname, long qlen_lazy, long qlen, int blimit),
+ TP_PROTO(char *rcuname, long qlen_lazy, long qlen, long blimit),
TP_ARGS(rcuname, qlen_lazy, qlen, blimit),
@@ -401,7 +403,7 @@ TRACE_EVENT(rcu_batch_start,
__field(char *, rcuname)
__field(long, qlen_lazy)
__field(long, qlen)
- __field(int, blimit)
+ __field(long, blimit)
),
TP_fast_assign(
@@ -411,7 +413,7 @@ TRACE_EVENT(rcu_batch_start,
__entry->blimit = blimit;
),
- TP_printk("%s CBs=%ld/%ld bl=%d",
+ TP_printk("%s CBs=%ld/%ld bl=%ld",
__entry->rcuname, __entry->qlen_lazy, __entry->qlen,
__entry->blimit)
);
@@ -523,22 +525,30 @@ TRACE_EVENT(rcu_batch_end,
*/
TRACE_EVENT(rcu_torture_read,
- TP_PROTO(char *rcutorturename, struct rcu_head *rhp),
+ TP_PROTO(char *rcutorturename, struct rcu_head *rhp,
+ unsigned long secs, unsigned long c_old, unsigned long c),
- TP_ARGS(rcutorturename, rhp),
+ TP_ARGS(rcutorturename, rhp, secs, c_old, c),
TP_STRUCT__entry(
__field(char *, rcutorturename)
__field(struct rcu_head *, rhp)
+ __field(unsigned long, secs)
+ __field(unsigned long, c_old)
+ __field(unsigned long, c)
),
TP_fast_assign(
__entry->rcutorturename = rcutorturename;
__entry->rhp = rhp;
+ __entry->secs = secs;
+ __entry->c_old = c_old;
+ __entry->c = c;
),
- TP_printk("%s torture read %p",
- __entry->rcutorturename, __entry->rhp)
+ TP_printk("%s torture read %p %luus c: %lu %lu",
+ __entry->rcutorturename, __entry->rhp,
+ __entry->secs, __entry->c_old, __entry->c)
);
/*
@@ -608,7 +618,8 @@ TRACE_EVENT(rcu_barrier,
#define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0)
#define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \
do { } while (0)
-#define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
+#define trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
+ do { } while (0)
#define trace_rcu_barrier(name, s, cpu, cnt, done) do { } while (0)
#endif /* #else #ifdef CONFIG_RCU_TRACE */
diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
index f28d1b65f178..bf0e18ba6cfb 100644
--- a/include/trace/events/workqueue.h
+++ b/include/trace/events/workqueue.h
@@ -27,7 +27,7 @@ DECLARE_EVENT_CLASS(workqueue_work,
/**
* workqueue_queue_work - called when a work gets queued
* @req_cpu: the requested cpu
- * @cwq: pointer to struct cpu_workqueue_struct
+ * @pwq: pointer to struct pool_workqueue
* @work: pointer to struct work_struct
*
* This event occurs when a work is queued immediately or once a
@@ -36,10 +36,10 @@ DECLARE_EVENT_CLASS(workqueue_work,
*/
TRACE_EVENT(workqueue_queue_work,
- TP_PROTO(unsigned int req_cpu, struct cpu_workqueue_struct *cwq,
+ TP_PROTO(unsigned int req_cpu, struct pool_workqueue *pwq,
struct work_struct *work),
- TP_ARGS(req_cpu, cwq, work),
+ TP_ARGS(req_cpu, pwq, work),
TP_STRUCT__entry(
__field( void *, work )
@@ -52,9 +52,9 @@ TRACE_EVENT(workqueue_queue_work,
TP_fast_assign(
__entry->work = work;
__entry->function = work->func;
- __entry->workqueue = cwq->wq;
+ __entry->workqueue = pwq->wq;
__entry->req_cpu = req_cpu;
- __entry->cpu = cwq->pool->gcwq->cpu;
+ __entry->cpu = pwq->pool->cpu;
),
TP_printk("work struct=%p function=%pf workqueue=%p req_cpu=%u cpu=%u",
diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h
index 77cdba9df274..bb991dfe134f 100644
--- a/include/uapi/linux/auto_fs.h
+++ b/include/uapi/linux/auto_fs.h
@@ -28,25 +28,16 @@
#define AUTOFS_MIN_PROTO_VERSION AUTOFS_PROTO_VERSION
/*
- * Architectures where both 32- and 64-bit binaries can be executed
- * on 64-bit kernels need this. This keeps the structure format
- * uniform, and makes sure the wait_queue_token isn't too big to be
- * passed back down to the kernel.
- *
- * This assumes that on these architectures:
- * mode 32 bit 64 bit
- * -------------------------
- * int 32 bit 32 bit
- * long 32 bit 64 bit
- *
- * If so, 32-bit user-space code should be backwards compatible.
+ * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed
+ * back to the kernel via ioctl from userspace. On architectures where 32- and
+ * 64-bit userspace binaries can be executed it's important that the size of
+ * autofs_wqt_t stays constant between 32- and 64-bit Linux kernels so that we
+ * do not break the binary ABI interface by changing the structure size.
*/
-
-#if defined(__sparc__) || defined(__mips__) || defined(__x86_64__) \
- || defined(__powerpc__) || defined(__s390__)
-typedef unsigned int autofs_wqt_t;
-#else
+#if defined(__ia64__) || defined(__alpha__) /* pure 64bit architectures */
typedef unsigned long autofs_wqt_t;
+#else
+typedef unsigned int autofs_wqt_t;
#endif
/* Packet types */
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 4f63c05d27c9..9fa9c622a7f4 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -579,7 +579,8 @@ enum perf_event_type {
* { u32 size;
* char data[size];}&& PERF_SAMPLE_RAW
*
- * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
+ * { u64 nr;
+ * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
*
* { u64 abi; # enum perf_sample_regs_abi
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER