aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/.gitignore2
-rw-r--r--kernel/Makefile11
-rw-r--r--kernel/async.c53
-rw-r--r--kernel/audit.c267
-rw-r--r--kernel/audit.h81
-rw-r--r--kernel/audit_fsnotify.c2
-rw-r--r--kernel/audit_tree.c19
-rw-r--r--kernel/audit_watch.c2
-rw-r--r--kernel/auditfilter.c6
-rw-r--r--kernel/auditsc.c320
-rw-r--r--kernel/capability.c45
-rw-r--r--kernel/cgroup/cgroup.c27
-rw-r--r--kernel/cgroup/cpuset.c13
-rw-r--r--kernel/cgroup/pids.c4
-rw-r--r--kernel/cgroup/rdma.c5
-rw-r--r--kernel/cgroup/rstat.c10
-rw-r--r--kernel/configs.c42
-rw-r--r--kernel/crash_core.c2
-rw-r--r--kernel/cred.c13
-rw-r--r--kernel/dma/Kconfig124
-rw-r--r--kernel/dma/Makefile2
-rw-r--r--kernel/dma/coherent.c50
-rw-r--r--kernel/dma/debug.c108
-rw-r--r--kernel/dma/direct.c28
-rw-r--r--kernel/dma/mapping.c25
-rw-r--r--kernel/dma/swiotlb.c70
-rw-r--r--kernel/events/core.c8
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/gcov/gcc_3_4.c6
-rw-r--r--kernel/hung_task.c3
-rw-r--r--kernel/irq/chip.c12
-rw-r--r--kernel/irq/irq_sim.c12
-rw-r--r--kernel/irq/irqdomain.c45
-rw-r--r--kernel/kcov.c15
-rw-r--r--kernel/kthread.c3
-rw-r--r--kernel/livepatch/core.c854
-rw-r--r--kernel/livepatch/core.h11
-rw-r--r--kernel/livepatch/patch.c57
-rw-r--r--kernel/livepatch/patch.h5
-rw-r--r--kernel/livepatch/transition.c124
-rw-r--r--kernel/livepatch/transition.h1
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/panic.c10
-rw-r--r--kernel/power/energy_model.c57
-rw-r--r--kernel/power/qos.c8
-rw-r--r--kernel/power/snapshot.c17
-rw-r--r--kernel/printk/printk.c90
-rw-r--r--kernel/resource.c4
-rw-r--r--kernel/sched/core.c3
-rw-r--r--kernel/sched/fair.c15
-rw-r--r--kernel/seccomp.c4
-rw-r--r--kernel/sys.c11
-rw-r--r--kernel/sys_ni.c6
-rw-r--r--kernel/sysctl.c45
-rw-r--r--kernel/trace/trace_events_filter.c5
-rw-r--r--kernel/trace/trace_kprobe.c2
-rw-r--r--kernel/watchdog.c7
-rw-r--r--kernel/workqueue.c103
59 files changed, 1810 insertions, 1074 deletions
diff --git a/kernel/.gitignore b/kernel/.gitignore
index b3097bde4e9c..6e699100872f 100644
--- a/kernel/.gitignore
+++ b/kernel/.gitignore
@@ -1,7 +1,5 @@
#
# Generated files
#
-config_data.h
-config_data.gz
timeconst.h
hz.bc
diff --git a/kernel/Makefile b/kernel/Makefile
index 6aa7543bcdb2..6c57e78817da 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -116,17 +116,8 @@ obj-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak.o
KASAN_SANITIZE_stackleak.o := n
KCOV_INSTRUMENT_stackleak.o := n
-$(obj)/configs.o: $(obj)/config_data.h
+$(obj)/configs.o: $(obj)/config_data.gz
targets += config_data.gz
$(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
$(call if_changed,gzip)
-
-filechk_ikconfiggz = \
- echo "static const char kernel_config_data[] __used = MAGIC_START"; \
- cat $< | scripts/bin2c; \
- echo "MAGIC_END;"
-
-targets += config_data.h
-$(obj)/config_data.h: $(obj)/config_data.gz FORCE
- $(call filechk,ikconfiggz)
diff --git a/kernel/async.c b/kernel/async.c
index a893d6170944..f6bd0d9885e1 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -149,7 +149,25 @@ static void async_run_entry_fn(struct work_struct *work)
wake_up(&async_done);
}
-static async_cookie_t __async_schedule(async_func_t func, void *data, struct async_domain *domain)
+/**
+ * async_schedule_node_domain - NUMA specific version of async_schedule_domain
+ * @func: function to execute asynchronously
+ * @data: data pointer to pass to the function
+ * @node: NUMA node that we want to schedule this on or close to
+ * @domain: the domain
+ *
+ * Returns an async_cookie_t that may be used for checkpointing later.
+ * @domain may be used in the async_synchronize_*_domain() functions to
+ * wait within a certain synchronization domain rather than globally.
+ *
+ * Note: This function may be called from atomic or non-atomic contexts.
+ *
+ * The node requested will be honored on a best effort basis. If the node
+ * has no CPUs associated with it then the work is distributed among all
+ * available CPUs.
+ */
+async_cookie_t async_schedule_node_domain(async_func_t func, void *data,
+ int node, struct async_domain *domain)
{
struct async_entry *entry;
unsigned long flags;
@@ -195,43 +213,30 @@ static async_cookie_t __async_schedule(async_func_t func, void *data, struct asy
current->flags |= PF_USED_ASYNC;
/* schedule for execution */
- queue_work(system_unbound_wq, &entry->work);
+ queue_work_node(node, system_unbound_wq, &entry->work);
return newcookie;
}
+EXPORT_SYMBOL_GPL(async_schedule_node_domain);
/**
- * async_schedule - schedule a function for asynchronous execution
+ * async_schedule_node - NUMA specific version of async_schedule
* @func: function to execute asynchronously
* @data: data pointer to pass to the function
+ * @node: NUMA node that we want to schedule this on or close to
*
* Returns an async_cookie_t that may be used for checkpointing later.
* Note: This function may be called from atomic or non-atomic contexts.
- */
-async_cookie_t async_schedule(async_func_t func, void *data)
-{
- return __async_schedule(func, data, &async_dfl_domain);
-}
-EXPORT_SYMBOL_GPL(async_schedule);
-
-/**
- * async_schedule_domain - schedule a function for asynchronous execution within a certain domain
- * @func: function to execute asynchronously
- * @data: data pointer to pass to the function
- * @domain: the domain
*
- * Returns an async_cookie_t that may be used for checkpointing later.
- * @domain may be used in the async_synchronize_*_domain() functions to
- * wait within a certain synchronization domain rather than globally. A
- * synchronization domain is specified via @domain. Note: This function
- * may be called from atomic or non-atomic contexts.
+ * The node requested will be honored on a best effort basis. If the node
+ * has no CPUs associated with it then the work is distributed among all
+ * available CPUs.
*/
-async_cookie_t async_schedule_domain(async_func_t func, void *data,
- struct async_domain *domain)
+async_cookie_t async_schedule_node(async_func_t func, void *data, int node)
{
- return __async_schedule(func, data, domain);
+ return async_schedule_node_domain(func, data, node, &async_dfl_domain);
}
-EXPORT_SYMBOL_GPL(async_schedule_domain);
+EXPORT_SYMBOL_GPL(async_schedule_node);
/**
* async_synchronize_full - synchronize all asynchronous function calls
diff --git a/kernel/audit.c b/kernel/audit.c
index 632d36059556..c89ea48c70a6 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -396,10 +396,10 @@ static int audit_log_config_change(char *function_name, u32 new, u32 old,
struct audit_buffer *ab;
int rc = 0;
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
+ ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_CONFIG_CHANGE);
if (unlikely(!ab))
return rc;
- audit_log_format(ab, "%s=%u old=%u ", function_name, new, old);
+ audit_log_format(ab, "op=set %s=%u old=%u ", function_name, new, old);
audit_log_session_info(ab);
rc = audit_log_task_context(ab);
if (rc)
@@ -1053,7 +1053,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
return err;
}
-static void audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type)
+static void audit_log_common_recv_msg(struct audit_context *context,
+ struct audit_buffer **ab, u16 msg_type)
{
uid_t uid = from_kuid(&init_user_ns, current_uid());
pid_t pid = task_tgid_nr(current);
@@ -1063,7 +1064,7 @@ static void audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type)
return;
}
- *ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
+ *ab = audit_log_start(context, GFP_KERNEL, msg_type);
if (unlikely(!*ab))
return;
audit_log_format(*ab, "pid=%d uid=%u ", pid, uid);
@@ -1071,6 +1072,12 @@ static void audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type)
audit_log_task_context(*ab);
}
+static inline void audit_log_user_recv_msg(struct audit_buffer **ab,
+ u16 msg_type)
+{
+ audit_log_common_recv_msg(NULL, ab, msg_type);
+}
+
int is_audit_feature_set(int i)
{
return af.features & AUDIT_FEATURE_TO_MASK(i);
@@ -1338,7 +1345,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (err)
break;
}
- audit_log_common_recv_msg(&ab, msg_type);
+ audit_log_user_recv_msg(&ab, msg_type);
if (msg_type != AUDIT_USER_TTY)
audit_log_format(ab, " msg='%.*s'",
AUDIT_MESSAGE_TEXT_MAX,
@@ -1361,8 +1368,12 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (nlmsg_len(nlh) < sizeof(struct audit_rule_data))
return -EINVAL;
if (audit_enabled == AUDIT_LOCKED) {
- audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
- audit_log_format(ab, " audit_enabled=%d res=0", audit_enabled);
+ audit_log_common_recv_msg(audit_context(), &ab,
+ AUDIT_CONFIG_CHANGE);
+ audit_log_format(ab, " op=%s audit_enabled=%d res=0",
+ msg_type == AUDIT_ADD_RULE ?
+ "add_rule" : "remove_rule",
+ audit_enabled);
audit_log_end(ab);
return -EPERM;
}
@@ -1373,7 +1384,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
break;
case AUDIT_TRIM:
audit_trim_trees();
- audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
+ audit_log_common_recv_msg(audit_context(), &ab,
+ AUDIT_CONFIG_CHANGE);
audit_log_format(ab, " op=trim res=1");
audit_log_end(ab);
break;
@@ -1403,8 +1415,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
/* OK, here comes... */
err = audit_tag_tree(old, new);
- audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
-
+ audit_log_common_recv_msg(audit_context(), &ab,
+ AUDIT_CONFIG_CHANGE);
audit_log_format(ab, " op=make_equiv old=");
audit_log_untrustedstring(ab, old);
audit_log_format(ab, " new=");
@@ -1471,7 +1483,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
old.enabled = t & AUDIT_TTY_ENABLE;
old.log_passwd = !!(t & AUDIT_TTY_LOG_PASSWD);
- audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
+ audit_log_common_recv_msg(audit_context(), &ab,
+ AUDIT_CONFIG_CHANGE);
audit_log_format(ab, " op=tty_set old-enabled=%d new-enabled=%d"
" old-log_passwd=%d new-log_passwd=%d res=%d",
old.enabled, s.enabled, old.log_passwd,
@@ -2054,153 +2067,6 @@ void audit_log_key(struct audit_buffer *ab, char *key)
audit_log_format(ab, "(null)");
}
-void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap)
-{
- int i;
-
- if (cap_isclear(*cap)) {
- audit_log_format(ab, " %s=0", prefix);
- return;
- }
- audit_log_format(ab, " %s=", prefix);
- CAP_FOR_EACH_U32(i)
- audit_log_format(ab, "%08x", cap->cap[CAP_LAST_U32 - i]);
-}
-
-static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
-{
- audit_log_cap(ab, "cap_fp", &name->fcap.permitted);
- audit_log_cap(ab, "cap_fi", &name->fcap.inheritable);
- audit_log_format(ab, " cap_fe=%d cap_fver=%x",
- name->fcap.fE, name->fcap_ver);
-}
-
-static inline int audit_copy_fcaps(struct audit_names *name,
- const struct dentry *dentry)
-{
- struct cpu_vfs_cap_data caps;
- int rc;
-
- if (!dentry)
- return 0;
-
- rc = get_vfs_caps_from_disk(dentry, &caps);
- if (rc)
- return rc;
-
- name->fcap.permitted = caps.permitted;
- name->fcap.inheritable = caps.inheritable;
- name->fcap.fE = !!(caps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE);
- name->fcap_ver = (caps.magic_etc & VFS_CAP_REVISION_MASK) >>
- VFS_CAP_REVISION_SHIFT;
-
- return 0;
-}
-
-/* Copy inode data into an audit_names. */
-void audit_copy_inode(struct audit_names *name, const struct dentry *dentry,
- struct inode *inode)
-{
- name->ino = inode->i_ino;
- name->dev = inode->i_sb->s_dev;
- name->mode = inode->i_mode;
- name->uid = inode->i_uid;
- name->gid = inode->i_gid;
- name->rdev = inode->i_rdev;
- security_inode_getsecid(inode, &name->osid);
- audit_copy_fcaps(name, dentry);
-}
-
-/**
- * audit_log_name - produce AUDIT_PATH record from struct audit_names
- * @context: audit_context for the task
- * @n: audit_names structure with reportable details
- * @path: optional path to report instead of audit_names->name
- * @record_num: record number to report when handling a list of names
- * @call_panic: optional pointer to int that will be updated if secid fails
- */
-void audit_log_name(struct audit_context *context, struct audit_names *n,
- const struct path *path, int record_num, int *call_panic)
-{
- struct audit_buffer *ab;
- ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
- if (!ab)
- return;
-
- audit_log_format(ab, "item=%d", record_num);
-
- if (path)
- audit_log_d_path(ab, " name=", path);
- else if (n->name) {
- switch (n->name_len) {
- case AUDIT_NAME_FULL:
- /* log the full path */
- audit_log_format(ab, " name=");
- audit_log_untrustedstring(ab, n->name->name);
- break;
- case 0:
- /* name was specified as a relative path and the
- * directory component is the cwd */
- audit_log_d_path(ab, " name=", &context->pwd);
- break;
- default:
- /* log the name's directory component */
- audit_log_format(ab, " name=");
- audit_log_n_untrustedstring(ab, n->name->name,
- n->name_len);
- }
- } else
- audit_log_format(ab, " name=(null)");
-
- if (n->ino != AUDIT_INO_UNSET)
- audit_log_format(ab, " inode=%lu"
- " dev=%02x:%02x mode=%#ho"
- " ouid=%u ogid=%u rdev=%02x:%02x",
- n->ino,
- MAJOR(n->dev),
- MINOR(n->dev),
- n->mode,
- from_kuid(&init_user_ns, n->uid),
- from_kgid(&init_user_ns, n->gid),
- MAJOR(n->rdev),
- MINOR(n->rdev));
- if (n->osid != 0) {
- char *ctx = NULL;
- u32 len;
- if (security_secid_to_secctx(
- n->osid, &ctx, &len)) {
- audit_log_format(ab, " osid=%u", n->osid);
- if (call_panic)
- *call_panic = 2;
- } else {
- audit_log_format(ab, " obj=%s", ctx);
- security_release_secctx(ctx, len);
- }
- }
-
- /* log the audit_names record type */
- switch(n->type) {
- case AUDIT_TYPE_NORMAL:
- audit_log_format(ab, " nametype=NORMAL");
- break;
- case AUDIT_TYPE_PARENT:
- audit_log_format(ab, " nametype=PARENT");
- break;
- case AUDIT_TYPE_CHILD_DELETE:
- audit_log_format(ab, " nametype=DELETE");
- break;
- case AUDIT_TYPE_CHILD_CREATE:
- audit_log_format(ab, " nametype=CREATE");
- break;
- default:
- audit_log_format(ab, " nametype=UNKNOWN");
- break;
- }
-
- audit_log_fcaps(ab, n);
- audit_log_end(ab);
-}
-
int audit_log_task_context(struct audit_buffer *ab)
{
char *ctx = NULL;
@@ -2322,6 +2188,91 @@ void audit_log_link_denied(const char *operation)
audit_log_end(ab);
}
+/* global counter which is incremented every time something logs in */
+static atomic_t session_id = ATOMIC_INIT(0);
+
+static int audit_set_loginuid_perm(kuid_t loginuid)
+{
+ /* if we are unset, we don't need privs */
+ if (!audit_loginuid_set(current))
+ return 0;
+ /* if AUDIT_FEATURE_LOGINUID_IMMUTABLE means never ever allow a change*/
+ if (is_audit_feature_set(AUDIT_FEATURE_LOGINUID_IMMUTABLE))
+ return -EPERM;
+ /* it is set, you need permission */
+ if (!capable(CAP_AUDIT_CONTROL))
+ return -EPERM;
+ /* reject if this is not an unset and we don't allow that */
+ if (is_audit_feature_set(AUDIT_FEATURE_ONLY_UNSET_LOGINUID)
+ && uid_valid(loginuid))
+ return -EPERM;
+ return 0;
+}
+
+static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid,
+ unsigned int oldsessionid,
+ unsigned int sessionid, int rc)
+{
+ struct audit_buffer *ab;
+ uid_t uid, oldloginuid, loginuid;
+ struct tty_struct *tty;
+
+ if (!audit_enabled)
+ return;
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
+ if (!ab)
+ return;
+
+ uid = from_kuid(&init_user_ns, task_uid(current));
+ oldloginuid = from_kuid(&init_user_ns, koldloginuid);
+ loginuid = from_kuid(&init_user_ns, kloginuid),
+ tty = audit_get_tty();
+
+ audit_log_format(ab, "pid=%d uid=%u", task_tgid_nr(current), uid);
+ audit_log_task_context(ab);
+ audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d",
+ oldloginuid, loginuid, tty ? tty_name(tty) : "(none)",
+ oldsessionid, sessionid, !rc);
+ audit_put_tty(tty);
+ audit_log_end(ab);
+}
+
+/**
+ * audit_set_loginuid - set current task's loginuid
+ * @loginuid: loginuid value
+ *
+ * Returns 0.
+ *
+ * Called (set) from fs/proc/base.c::proc_loginuid_write().
+ */
+int audit_set_loginuid(kuid_t loginuid)
+{
+ unsigned int oldsessionid, sessionid = AUDIT_SID_UNSET;
+ kuid_t oldloginuid;
+ int rc;
+
+ oldloginuid = audit_get_loginuid(current);
+ oldsessionid = audit_get_sessionid(current);
+
+ rc = audit_set_loginuid_perm(loginuid);
+ if (rc)
+ goto out;
+
+ /* are we setting or clearing? */
+ if (uid_valid(loginuid)) {
+ sessionid = (unsigned int)atomic_inc_return(&session_id);
+ if (unlikely(sessionid == AUDIT_SID_UNSET))
+ sessionid = (unsigned int)atomic_inc_return(&session_id);
+ }
+
+ current->sessionid = sessionid;
+ current->loginuid = loginuid;
+out:
+ audit_log_set_loginuid(oldloginuid, loginuid, oldsessionid, sessionid, rc);
+ return rc;
+}
+
/**
* audit_log_end - end one audit record
* @ab: the audit_buffer
diff --git a/kernel/audit.h b/kernel/audit.h
index 91421679a168..958d5b8fc1b3 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -69,6 +69,7 @@ struct audit_cap_data {
kernel_cap_t effective; /* effective set of process */
};
kernel_cap_t ambient;
+ kuid_t rootid;
};
/* When fs/namei.c:getname() is called, we store the pointer in name and bump
@@ -212,15 +213,6 @@ extern bool audit_ever_enabled;
extern void audit_log_session_info(struct audit_buffer *ab);
-extern void audit_copy_inode(struct audit_names *name,
- const struct dentry *dentry,
- struct inode *inode);
-extern void audit_log_cap(struct audit_buffer *ab, char *prefix,
- kernel_cap_t *cap);
-extern void audit_log_name(struct audit_context *context,
- struct audit_names *n, const struct path *path,
- int record_num, int *call_panic);
-
extern int auditd_test_task(struct task_struct *task);
#define AUDIT_INODE_BUCKETS 32
@@ -267,25 +259,52 @@ extern void audit_log_d_path_exe(struct audit_buffer *ab,
extern struct tty_struct *audit_get_tty(void);
extern void audit_put_tty(struct tty_struct *tty);
-/* audit watch functions */
+/* audit watch/mark/tree functions */
#ifdef CONFIG_AUDITSYSCALL
+extern unsigned int audit_serial(void);
+extern int auditsc_get_stamp(struct audit_context *ctx,
+ struct timespec64 *t, unsigned int *serial);
+
extern void audit_put_watch(struct audit_watch *watch);
extern void audit_get_watch(struct audit_watch *watch);
-extern int audit_to_watch(struct audit_krule *krule, char *path, int len, u32 op);
+extern int audit_to_watch(struct audit_krule *krule, char *path, int len,
+ u32 op);
extern int audit_add_watch(struct audit_krule *krule, struct list_head **list);
extern void audit_remove_watch_rule(struct audit_krule *krule);
extern char *audit_watch_path(struct audit_watch *watch);
-extern int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev);
+extern int audit_watch_compare(struct audit_watch *watch, unsigned long ino,
+ dev_t dev);
-extern struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pathname, int len);
+extern struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule,
+ char *pathname, int len);
extern char *audit_mark_path(struct audit_fsnotify_mark *mark);
extern void audit_remove_mark(struct audit_fsnotify_mark *audit_mark);
extern void audit_remove_mark_rule(struct audit_krule *krule);
-extern int audit_mark_compare(struct audit_fsnotify_mark *mark, unsigned long ino, dev_t dev);
+extern int audit_mark_compare(struct audit_fsnotify_mark *mark,
+ unsigned long ino, dev_t dev);
extern int audit_dupe_exe(struct audit_krule *new, struct audit_krule *old);
-extern int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark);
+extern int audit_exe_compare(struct task_struct *tsk,
+ struct audit_fsnotify_mark *mark);
+
+extern struct audit_chunk *audit_tree_lookup(const struct inode *inode);
+extern void audit_put_chunk(struct audit_chunk *chunk);
+extern bool audit_tree_match(struct audit_chunk *chunk,
+ struct audit_tree *tree);
+extern int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op);
+extern int audit_add_tree_rule(struct audit_krule *rule);
+extern int audit_remove_tree_rule(struct audit_krule *rule);
+extern void audit_trim_trees(void);
+extern int audit_tag_tree(char *old, char *new);
+extern const char *audit_tree_path(struct audit_tree *tree);
+extern void audit_put_tree(struct audit_tree *tree);
+extern void audit_kill_trees(struct audit_context *context);
-#else
+extern int audit_signal_info(int sig, struct task_struct *t);
+extern void audit_filter_inodes(struct task_struct *tsk,
+ struct audit_context *ctx);
+extern struct list_head *audit_killed_trees(void);
+#else /* CONFIG_AUDITSYSCALL */
+#define auditsc_get_stamp(c, t, s) 0
#define audit_put_watch(w) {}
#define audit_get_watch(w) {}
#define audit_to_watch(k, p, l, o) (-EINVAL)
@@ -301,21 +320,7 @@ extern int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark
#define audit_mark_compare(m, i, d) 0
#define audit_exe_compare(t, m) (-EINVAL)
#define audit_dupe_exe(n, o) (-EINVAL)
-#endif /* CONFIG_AUDITSYSCALL */
-#ifdef CONFIG_AUDITSYSCALL
-extern struct audit_chunk *audit_tree_lookup(const struct inode *inode);
-extern void audit_put_chunk(struct audit_chunk *chunk);
-extern bool audit_tree_match(struct audit_chunk *chunk, struct audit_tree *tree);
-extern int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op);
-extern int audit_add_tree_rule(struct audit_krule *rule);
-extern int audit_remove_tree_rule(struct audit_krule *rule);
-extern void audit_trim_trees(void);
-extern int audit_tag_tree(char *old, char *new);
-extern const char *audit_tree_path(struct audit_tree *tree);
-extern void audit_put_tree(struct audit_tree *tree);
-extern void audit_kill_trees(struct list_head *list);
-#else
#define audit_remove_tree_rule(rule) BUG()
#define audit_add_tree_rule(rule) -EINVAL
#define audit_make_tree(rule, str, op) -EINVAL
@@ -323,8 +328,11 @@ extern void audit_kill_trees(struct list_head *list);
#define audit_put_tree(tree) (void)0
#define audit_tag_tree(old, new) -EINVAL
#define audit_tree_path(rule) "" /* never called */
-#define audit_kill_trees(list) BUG()
-#endif
+#define audit_kill_trees(context) BUG()
+
+#define audit_signal_info(s, t) AUDIT_DISABLED
+#define audit_filter_inodes(t, c) AUDIT_DISABLED
+#endif /* CONFIG_AUDITSYSCALL */
extern char *audit_unpack_string(void **bufp, size_t *remain, size_t len);
@@ -334,14 +342,5 @@ extern u32 audit_sig_sid;
extern int audit_filter(int msgtype, unsigned int listtype);
-#ifdef CONFIG_AUDITSYSCALL
-extern int audit_signal_info(int sig, struct task_struct *t);
-extern void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx);
-extern struct list_head *audit_killed_trees(void);
-#else
-#define audit_signal_info(s,t) AUDIT_DISABLED
-#define audit_filter_inodes(t,c) AUDIT_DISABLED
-#endif
-
extern void audit_ctl_lock(void);
extern void audit_ctl_unlock(void);
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index cf4512a33675..37ae95cfb7f4 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -127,7 +127,7 @@ static void audit_mark_log_rule_change(struct audit_fsnotify_mark *audit_mark, c
if (!audit_enabled)
return;
- ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
+ ab = audit_log_start(audit_context(), GFP_NOFS, AUDIT_CONFIG_CHANGE);
if (unlikely(!ab))
return;
audit_log_session_info(ab);
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index d4af4d97f847..abfb112f26aa 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -524,13 +524,14 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
return 0;
}
-static void audit_tree_log_remove_rule(struct audit_krule *rule)
+static void audit_tree_log_remove_rule(struct audit_context *context,
+ struct audit_krule *rule)
{
struct audit_buffer *ab;
if (!audit_enabled)
return;
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
if (unlikely(!ab))
return;
audit_log_format(ab, "op=remove_rule dir=");
@@ -540,7 +541,7 @@ static void audit_tree_log_remove_rule(struct audit_krule *rule)
audit_log_end(ab);
}
-static void kill_rules(struct audit_tree *tree)
+static void kill_rules(struct audit_context *context, struct audit_tree *tree)
{
struct audit_krule *rule, *next;
struct audit_entry *entry;
@@ -551,7 +552,7 @@ static void kill_rules(struct audit_tree *tree)
list_del_init(&rule->rlist);
if (rule->tree) {
/* not a half-baked one */
- audit_tree_log_remove_rule(rule);
+ audit_tree_log_remove_rule(context, rule);
if (entry->rule.exe)
audit_remove_mark(entry->rule.exe);
rule->tree = NULL;
@@ -633,7 +634,7 @@ static void trim_marked(struct audit_tree *tree)
tree->goner = 1;
spin_unlock(&hash_lock);
mutex_lock(&audit_filter_mutex);
- kill_rules(tree);
+ kill_rules(audit_context(), tree);
list_del_init(&tree->list);
mutex_unlock(&audit_filter_mutex);
prune_one(tree);
@@ -973,8 +974,10 @@ static void audit_schedule_prune(void)
* ... and that one is done if evict_chunk() decides to delay until the end
* of syscall. Runs synchronously.
*/
-void audit_kill_trees(struct list_head *list)
+void audit_kill_trees(struct audit_context *context)
{
+ struct list_head *list = &context->killed_trees;
+
audit_ctl_lock();
mutex_lock(&audit_filter_mutex);
@@ -982,7 +985,7 @@ void audit_kill_trees(struct list_head *list)
struct audit_tree *victim;
victim = list_entry(list->next, struct audit_tree, list);
- kill_rules(victim);
+ kill_rules(context, victim);
list_del_init(&victim->list);
mutex_unlock(&audit_filter_mutex);
@@ -1017,7 +1020,7 @@ static void evict_chunk(struct audit_chunk *chunk)
list_del_init(&owner->same_root);
spin_unlock(&hash_lock);
if (!postponed) {
- kill_rules(owner);
+ kill_rules(audit_context(), owner);
list_move(&owner->list, &prune_list);
need_prune = 1;
} else {
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 20ef9ba134b0..e8d1adeb2223 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -242,7 +242,7 @@ static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watc
if (!audit_enabled)
return;
- ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
+ ab = audit_log_start(audit_context(), GFP_NOFS, AUDIT_CONFIG_CHANGE);
if (!ab)
return;
audit_log_session_info(ab);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index bf309f2592c4..63f8b3f26fab 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -670,7 +670,7 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
data->values[i] = AUDIT_UID_UNSET;
break;
}
- /* fallthrough if set */
+ /* fall through - if set */
default:
data->values[i] = f->val;
}
@@ -1091,7 +1091,7 @@ static void audit_log_rule_change(char *action, struct audit_krule *rule, int re
if (!audit_enabled)
return;
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
+ ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_CONFIG_CHANGE);
if (!ab)
return;
audit_log_session_info(ab);
@@ -1355,7 +1355,7 @@ int audit_filter(int msgtype, unsigned int listtype)
if (f->lsm_rule) {
security_task_getsecid(current, &sid);
result = security_audit_rule_match(sid,
- f->type, f->op, f->lsm_rule, NULL);
+ f->type, f->op, f->lsm_rule);
}
break;
case AUDIT_EXE:
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 6593a5207fb0..d1eab1d4a930 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -631,9 +631,8 @@ static int audit_filter_rules(struct task_struct *tsk,
need_sid = 0;
}
result = security_audit_rule_match(sid, f->type,
- f->op,
- f->lsm_rule,
- ctx);
+ f->op,
+ f->lsm_rule);
}
break;
case AUDIT_OBJ_USER:
@@ -647,13 +646,17 @@ static int audit_filter_rules(struct task_struct *tsk,
/* Find files that match */
if (name) {
result = security_audit_rule_match(
- name->osid, f->type, f->op,
- f->lsm_rule, ctx);
+ name->osid,
+ f->type,
+ f->op,
+ f->lsm_rule);
} else if (ctx) {
list_for_each_entry(n, &ctx->names_list, list) {
- if (security_audit_rule_match(n->osid, f->type,
- f->op, f->lsm_rule,
- ctx)) {
+ if (security_audit_rule_match(
+ n->osid,
+ f->type,
+ f->op,
+ f->lsm_rule)) {
++result;
break;
}
@@ -664,7 +667,7 @@ static int audit_filter_rules(struct task_struct *tsk,
break;
if (security_audit_rule_match(ctx->ipc.osid,
f->type, f->op,
- f->lsm_rule, ctx))
+ f->lsm_rule))
++result;
}
break;
@@ -1136,6 +1139,32 @@ out:
kfree(buf_head);
}
+void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap)
+{
+ int i;
+
+ if (cap_isclear(*cap)) {
+ audit_log_format(ab, " %s=0", prefix);
+ return;
+ }
+ audit_log_format(ab, " %s=", prefix);
+ CAP_FOR_EACH_U32(i)
+ audit_log_format(ab, "%08x", cap->cap[CAP_LAST_U32 - i]);
+}
+
+static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
+{
+ if (name->fcap_ver == -1) {
+ audit_log_format(ab, " cap_fe=? cap_fver=? cap_fp=? cap_fi=?");
+ return;
+ }
+ audit_log_cap(ab, "cap_fp", &name->fcap.permitted);
+ audit_log_cap(ab, "cap_fi", &name->fcap.inheritable);
+ audit_log_format(ab, " cap_fe=%d cap_fver=%x cap_frootid=%d",
+ name->fcap.fE, name->fcap_ver,
+ from_kuid(&init_user_ns, name->fcap.rootid));
+}
+
static void show_special(struct audit_context *context, int *call_panic)
{
struct audit_buffer *ab;
@@ -1258,6 +1287,97 @@ static inline int audit_proctitle_rtrim(char *proctitle, int len)
return len;
}
+/*
+ * audit_log_name - produce AUDIT_PATH record from struct audit_names
+ * @context: audit_context for the task
+ * @n: audit_names structure with reportable details
+ * @path: optional path to report instead of audit_names->name
+ * @record_num: record number to report when handling a list of names
+ * @call_panic: optional pointer to int that will be updated if secid fails
+ */
+static void audit_log_name(struct audit_context *context, struct audit_names *n,
+ const struct path *path, int record_num, int *call_panic)
+{
+ struct audit_buffer *ab;
+
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
+ if (!ab)
+ return;
+
+ audit_log_format(ab, "item=%d", record_num);
+
+ if (path)
+ audit_log_d_path(ab, " name=", path);
+ else if (n->name) {
+ switch (n->name_len) {
+ case AUDIT_NAME_FULL:
+ /* log the full path */
+ audit_log_format(ab, " name=");
+ audit_log_untrustedstring(ab, n->name->name);
+ break;
+ case 0:
+ /* name was specified as a relative path and the
+ * directory component is the cwd
+ */
+ audit_log_d_path(ab, " name=", &context->pwd);
+ break;
+ default:
+ /* log the name's directory component */
+ audit_log_format(ab, " name=");
+ audit_log_n_untrustedstring(ab, n->name->name,
+ n->name_len);
+ }
+ } else
+ audit_log_format(ab, " name=(null)");
+
+ if (n->ino != AUDIT_INO_UNSET)
+ audit_log_format(ab, " inode=%lu dev=%02x:%02x mode=%#ho ouid=%u ogid=%u rdev=%02x:%02x",
+ n->ino,
+ MAJOR(n->dev),
+ MINOR(n->dev),
+ n->mode,
+ from_kuid(&init_user_ns, n->uid),
+ from_kgid(&init_user_ns, n->gid),
+ MAJOR(n->rdev),
+ MINOR(n->rdev));
+ if (n->osid != 0) {
+ char *ctx = NULL;
+ u32 len;
+
+ if (security_secid_to_secctx(
+ n->osid, &ctx, &len)) {
+ audit_log_format(ab, " osid=%u", n->osid);
+ if (call_panic)
+ *call_panic = 2;
+ } else {
+ audit_log_format(ab, " obj=%s", ctx);
+ security_release_secctx(ctx, len);
+ }
+ }
+
+ /* log the audit_names record type */
+ switch (n->type) {
+ case AUDIT_TYPE_NORMAL:
+ audit_log_format(ab, " nametype=NORMAL");
+ break;
+ case AUDIT_TYPE_PARENT:
+ audit_log_format(ab, " nametype=PARENT");
+ break;
+ case AUDIT_TYPE_CHILD_DELETE:
+ audit_log_format(ab, " nametype=DELETE");
+ break;
+ case AUDIT_TYPE_CHILD_CREATE:
+ audit_log_format(ab, " nametype=CREATE");
+ break;
+ default:
+ audit_log_format(ab, " nametype=UNKNOWN");
+ break;
+ }
+
+ audit_log_fcaps(ab, n);
+ audit_log_end(ab);
+}
+
static void audit_log_proctitle(void)
{
int res;
@@ -1358,6 +1478,9 @@ static void audit_log_exit(void)
audit_log_cap(ab, "pi", &axs->new_pcap.inheritable);
audit_log_cap(ab, "pe", &axs->new_pcap.effective);
audit_log_cap(ab, "pa", &axs->new_pcap.ambient);
+ audit_log_format(ab, " frootid=%d",
+ from_kuid(&init_user_ns,
+ axs->fcap.rootid));
break; }
}
@@ -1444,6 +1567,9 @@ void __audit_free(struct task_struct *tsk)
if (!context)
return;
+ if (!list_empty(&context->killed_trees))
+ audit_kill_trees(context);
+
/* We are called either by do_exit() or the fork() error handling code;
* in the former case tsk == current and in the latter tsk is a
* random task_struct that doesn't doesn't have any meaningful data we
@@ -1460,9 +1586,6 @@ void __audit_free(struct task_struct *tsk)
audit_log_exit();
}
- if (!list_empty(&context->killed_trees))
- audit_kill_trees(&context->killed_trees);
-
audit_set_context(tsk, NULL);
audit_free_context(context);
}
@@ -1537,6 +1660,9 @@ void __audit_syscall_exit(int success, long return_code)
if (!context)
return;
+ if (!list_empty(&context->killed_trees))
+ audit_kill_trees(context);
+
if (!context->dummy && context->in_syscall) {
if (success)
context->return_valid = AUDITSC_SUCCESS;
@@ -1571,9 +1697,6 @@ void __audit_syscall_exit(int success, long return_code)
context->in_syscall = 0;
context->prio = context->state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0;
- if (!list_empty(&context->killed_trees))
- audit_kill_trees(&context->killed_trees);
-
audit_free_names(context);
unroll_tree_refs(context, NULL, 0);
audit_free_aux(context);
@@ -1750,6 +1873,47 @@ void __audit_getname(struct filename *name)
get_fs_pwd(current->fs, &context->pwd);
}
+static inline int audit_copy_fcaps(struct audit_names *name,
+ const struct dentry *dentry)
+{
+ struct cpu_vfs_cap_data caps;
+ int rc;
+
+ if (!dentry)
+ return 0;
+
+ rc = get_vfs_caps_from_disk(dentry, &caps);
+ if (rc)
+ return rc;
+
+ name->fcap.permitted = caps.permitted;
+ name->fcap.inheritable = caps.inheritable;
+ name->fcap.fE = !!(caps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE);
+ name->fcap.rootid = caps.rootid;
+ name->fcap_ver = (caps.magic_etc & VFS_CAP_REVISION_MASK) >>
+ VFS_CAP_REVISION_SHIFT;
+
+ return 0;
+}
+
+/* Copy inode data into an audit_names. */
+void audit_copy_inode(struct audit_names *name, const struct dentry *dentry,
+ struct inode *inode, unsigned int flags)
+{
+ name->ino = inode->i_ino;
+ name->dev = inode->i_sb->s_dev;
+ name->mode = inode->i_mode;
+ name->uid = inode->i_uid;
+ name->gid = inode->i_gid;
+ name->rdev = inode->i_rdev;
+ security_inode_getsecid(inode, &name->osid);
+ if (flags & AUDIT_INODE_NOEVAL) {
+ name->fcap_ver = -1;
+ return;
+ }
+ audit_copy_fcaps(name, dentry);
+}
+
/**
* __audit_inode - store the inode and device from a lookup
* @name: name being audited
@@ -1763,10 +1927,31 @@ void __audit_inode(struct filename *name, const struct dentry *dentry,
struct inode *inode = d_backing_inode(dentry);
struct audit_names *n;
bool parent = flags & AUDIT_INODE_PARENT;
+ struct audit_entry *e;
+ struct list_head *list = &audit_filter_list[AUDIT_FILTER_FS];
+ int i;
if (!context->in_syscall)
return;
+ rcu_read_lock();
+ if (!list_empty(list)) {
+ list_for_each_entry_rcu(e, list, list) {
+ for (i = 0; i < e->rule.field_count; i++) {
+ struct audit_field *f = &e->rule.fields[i];
+
+ if (f->type == AUDIT_FSTYPE
+ && audit_comparator(inode->i_sb->s_magic,
+ f->op, f->val)
+ && e->rule.action == AUDIT_NEVER) {
+ rcu_read_unlock();
+ return;
+ }
+ }
+ }
+ }
+ rcu_read_unlock();
+
if (!name)
goto out_alloc;
@@ -1832,7 +2017,7 @@ out:
n->type = AUDIT_TYPE_NORMAL;
}
handle_path(dentry);
- audit_copy_inode(n, dentry, inode);
+ audit_copy_inode(n, dentry, inode, flags & AUDIT_INODE_NOEVAL);
}
void __audit_file(const struct file *file)
@@ -1875,14 +2060,12 @@ void __audit_inode_child(struct inode *parent,
for (i = 0; i < e->rule.field_count; i++) {
struct audit_field *f = &e->rule.fields[i];
- if (f->type == AUDIT_FSTYPE) {
- if (audit_comparator(parent->i_sb->s_magic,
- f->op, f->val)) {
- if (e->rule.action == AUDIT_NEVER) {
- rcu_read_unlock();
- return;
- }
- }
+ if (f->type == AUDIT_FSTYPE
+ && audit_comparator(parent->i_sb->s_magic,
+ f->op, f->val)
+ && e->rule.action == AUDIT_NEVER) {
+ rcu_read_unlock();
+ return;
}
}
}
@@ -1933,7 +2116,7 @@ void __audit_inode_child(struct inode *parent,
n = audit_alloc_name(context, AUDIT_TYPE_PARENT);
if (!n)
return;
- audit_copy_inode(n, NULL, parent);
+ audit_copy_inode(n, NULL, parent, 0);
}
if (!found_child) {
@@ -1952,7 +2135,7 @@ void __audit_inode_child(struct inode *parent,
}
if (inode)
- audit_copy_inode(found_child, dentry, inode);
+ audit_copy_inode(found_child, dentry, inode, 0);
else
found_child->ino = AUDIT_INO_UNSET;
}
@@ -1983,90 +2166,6 @@ int auditsc_get_stamp(struct audit_context *ctx,
return 1;
}
-/* global counter which is incremented every time something logs in */
-static atomic_t session_id = ATOMIC_INIT(0);
-
-static int audit_set_loginuid_perm(kuid_t loginuid)
-{
- /* if we are unset, we don't need privs */
- if (!audit_loginuid_set(current))
- return 0;
- /* if AUDIT_FEATURE_LOGINUID_IMMUTABLE means never ever allow a change*/
- if (is_audit_feature_set(AUDIT_FEATURE_LOGINUID_IMMUTABLE))
- return -EPERM;
- /* it is set, you need permission */
- if (!capable(CAP_AUDIT_CONTROL))
- return -EPERM;
- /* reject if this is not an unset and we don't allow that */
- if (is_audit_feature_set(AUDIT_FEATURE_ONLY_UNSET_LOGINUID) && uid_valid(loginuid))
- return -EPERM;
- return 0;
-}
-
-static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid,
- unsigned int oldsessionid, unsigned int sessionid,
- int rc)
-{
- struct audit_buffer *ab;
- uid_t uid, oldloginuid, loginuid;
- struct tty_struct *tty;
-
- if (!audit_enabled)
- return;
-
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
- if (!ab)
- return;
-
- uid = from_kuid(&init_user_ns, task_uid(current));
- oldloginuid = from_kuid(&init_user_ns, koldloginuid);
- loginuid = from_kuid(&init_user_ns, kloginuid),
- tty = audit_get_tty();
-
- audit_log_format(ab, "pid=%d uid=%u", task_tgid_nr(current), uid);
- audit_log_task_context(ab);
- audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d",
- oldloginuid, loginuid, tty ? tty_name(tty) : "(none)",
- oldsessionid, sessionid, !rc);
- audit_put_tty(tty);
- audit_log_end(ab);
-}
-
-/**
- * audit_set_loginuid - set current task's audit_context loginuid
- * @loginuid: loginuid value
- *
- * Returns 0.
- *
- * Called (set) from fs/proc/base.c::proc_loginuid_write().
- */
-int audit_set_loginuid(kuid_t loginuid)
-{
- unsigned int oldsessionid, sessionid = AUDIT_SID_UNSET;
- kuid_t oldloginuid;
- int rc;
-
- oldloginuid = audit_get_loginuid(current);
- oldsessionid = audit_get_sessionid(current);
-
- rc = audit_set_loginuid_perm(loginuid);
- if (rc)
- goto out;
-
- /* are we setting or clearing? */
- if (uid_valid(loginuid)) {
- sessionid = (unsigned int)atomic_inc_return(&session_id);
- if (unlikely(sessionid == AUDIT_SID_UNSET))
- sessionid = (unsigned int)atomic_inc_return(&session_id);
- }
-
- current->sessionid = sessionid;
- current->loginuid = loginuid;
-out:
- audit_log_set_loginuid(oldloginuid, loginuid, oldsessionid, sessionid, rc);
- return rc;
-}
-
/**
* __audit_mq_open - record audit data for a POSIX MQ open
* @oflag: open flag
@@ -2355,6 +2454,7 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
ax->fcap.permitted = vcaps.permitted;
ax->fcap.inheritable = vcaps.inheritable;
ax->fcap.fE = !!(vcaps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE);
+ ax->fcap.rootid = vcaps.rootid;
ax->fcap_ver = (vcaps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT;
ax->old_pcap.permitted = old->cap_permitted;
diff --git a/kernel/capability.c b/kernel/capability.c
index 1e1c0236f55b..1444f3954d75 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -93,9 +93,7 @@ static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy)
break;
case _LINUX_CAPABILITY_VERSION_2:
warn_deprecated_v2();
- /*
- * fall through - v3 is otherwise equivalent to v2.
- */
+ /* fall through - v3 is otherwise equivalent to v2. */
case _LINUX_CAPABILITY_VERSION_3:
*tocopy = _LINUX_CAPABILITY_U32S_3;
break;
@@ -299,7 +297,7 @@ bool has_ns_capability(struct task_struct *t,
int ret;
rcu_read_lock();
- ret = security_capable(__task_cred(t), ns, cap);
+ ret = security_capable(__task_cred(t), ns, cap, CAP_OPT_NONE);
rcu_read_unlock();
return (ret == 0);
@@ -340,7 +338,7 @@ bool has_ns_capability_noaudit(struct task_struct *t,
int ret;
rcu_read_lock();
- ret = security_capable_noaudit(__task_cred(t), ns, cap);
+ ret = security_capable(__task_cred(t), ns, cap, CAP_OPT_NOAUDIT);
rcu_read_unlock();
return (ret == 0);
@@ -363,7 +361,9 @@ bool has_capability_noaudit(struct task_struct *t, int cap)
return has_ns_capability_noaudit(t, &init_user_ns, cap);
}
-static bool ns_capable_common(struct user_namespace *ns, int cap, bool audit)
+static bool ns_capable_common(struct user_namespace *ns,
+ int cap,
+ unsigned int opts)
{
int capable;
@@ -372,8 +372,7 @@ static bool ns_capable_common(struct user_namespace *ns, int cap, bool audit)
BUG();
}
- capable = audit ? security_capable(current_cred(), ns, cap) :
- security_capable_noaudit(current_cred(), ns, cap);
+ capable = security_capable(current_cred(), ns, cap, opts);
if (capable == 0) {
current->flags |= PF_SUPERPRIV;
return true;
@@ -394,7 +393,7 @@ static bool ns_capable_common(struct user_namespace *ns, int cap, bool audit)
*/
bool ns_capable(struct user_namespace *ns, int cap)
{
- return ns_capable_common(ns, cap, true);
+ return ns_capable_common(ns, cap, CAP_OPT_NONE);
}
EXPORT_SYMBOL(ns_capable);
@@ -412,11 +411,30 @@ EXPORT_SYMBOL(ns_capable);
*/
bool ns_capable_noaudit(struct user_namespace *ns, int cap)
{
- return ns_capable_common(ns, cap, false);
+ return ns_capable_common(ns, cap, CAP_OPT_NOAUDIT);
}
EXPORT_SYMBOL(ns_capable_noaudit);
/**
+ * ns_capable_setid - Determine if the current task has a superior capability
+ * in effect, while signalling that this check is being done from within a
+ * setid syscall.
+ * @ns: The usernamespace we want the capability in
+ * @cap: The capability to be tested for
+ *
+ * Return true if the current task has the given superior capability currently
+ * available for use, false if not.
+ *
+ * This sets PF_SUPERPRIV on the task if the capability is available on the
+ * assumption that it's about to be used.
+ */
+bool ns_capable_setid(struct user_namespace *ns, int cap)
+{
+ return ns_capable_common(ns, cap, CAP_OPT_INSETID);
+}
+EXPORT_SYMBOL(ns_capable_setid);
+
+/**
* capable - Determine if the current task has a superior capability in effect
* @cap: The capability to be tested for
*
@@ -448,10 +466,11 @@ EXPORT_SYMBOL(capable);
bool file_ns_capable(const struct file *file, struct user_namespace *ns,
int cap)
{
+
if (WARN_ON_ONCE(!cap_valid(cap)))
return false;
- if (security_capable(file->f_cred, ns, cap) == 0)
+ if (security_capable(file->f_cred, ns, cap, CAP_OPT_NONE) == 0)
return true;
return false;
@@ -500,10 +519,12 @@ bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns)
{
int ret = 0; /* An absent tracer adds no restrictions */
const struct cred *cred;
+
rcu_read_lock();
cred = rcu_dereference(tsk->ptracer_cred);
if (cred)
- ret = security_capable_noaudit(cred, ns, CAP_SYS_PTRACE);
+ ret = security_capable(cred, ns, CAP_SYS_PTRACE,
+ CAP_OPT_NOAUDIT);
rcu_read_unlock();
return (ret == 0);
}
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index cef98502b124..eef24a25bda7 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -197,7 +197,7 @@ static u64 css_serial_nr_next = 1;
*/
static u16 have_fork_callback __read_mostly;
static u16 have_exit_callback __read_mostly;
-static u16 have_free_callback __read_mostly;
+static u16 have_release_callback __read_mostly;
static u16 have_canfork_callback __read_mostly;
/* cgroup namespace for init task */
@@ -3534,6 +3534,16 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
return ret ?: nbytes;
}
+static __poll_t cgroup_file_poll(struct kernfs_open_file *of, poll_table *pt)
+{
+ struct cftype *cft = of->kn->priv;
+
+ if (cft->poll)
+ return cft->poll(of, pt);
+
+ return kernfs_generic_poll(of, pt);
+}
+
static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos)
{
return seq_cft(seq)->seq_start(seq, ppos);
@@ -3572,6 +3582,7 @@ static struct kernfs_ops cgroup_kf_single_ops = {
.open = cgroup_file_open,
.release = cgroup_file_release,
.write = cgroup_file_write,
+ .poll = cgroup_file_poll,
.seq_show = cgroup_seqfile_show,
};
@@ -3580,6 +3591,7 @@ static struct kernfs_ops cgroup_kf_ops = {
.open = cgroup_file_open,
.release = cgroup_file_release,
.write = cgroup_file_write,
+ .poll = cgroup_file_poll,
.seq_start = cgroup_seqfile_start,
.seq_next = cgroup_seqfile_next,
.seq_stop = cgroup_seqfile_stop,
@@ -5314,7 +5326,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
have_fork_callback |= (bool)ss->fork << ss->id;
have_exit_callback |= (bool)ss->exit << ss->id;
- have_free_callback |= (bool)ss->free << ss->id;
+ have_release_callback |= (bool)ss->release << ss->id;
have_canfork_callback |= (bool)ss->can_fork << ss->id;
/* At system boot, before all subsystems have been
@@ -5750,16 +5762,19 @@ void cgroup_exit(struct task_struct *tsk)
} while_each_subsys_mask();
}
-void cgroup_free(struct task_struct *task)
+void cgroup_release(struct task_struct *task)
{
- struct css_set *cset = task_css_set(task);
struct cgroup_subsys *ss;
int ssid;
- do_each_subsys_mask(ss, ssid, have_free_callback) {
- ss->free(task);
+ do_each_subsys_mask(ss, ssid, have_release_callback) {
+ ss->release(task);
} while_each_subsys_mask();
+}
+void cgroup_free(struct task_struct *task)
+{
+ struct css_set *cset = task_css_set(task);
put_css_set(cset);
}
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 479743db6c37..72afd55f70c6 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -203,19 +203,6 @@ static inline struct cpuset *parent_cs(struct cpuset *cs)
return css_cs(cs->css.parent);
}
-#ifdef CONFIG_NUMA
-static inline bool task_has_mempolicy(struct task_struct *task)
-{
- return task->mempolicy;
-}
-#else
-static inline bool task_has_mempolicy(struct task_struct *task)
-{
- return false;
-}
-#endif
-
-
/* bits in struct cpuset flags field */
typedef enum {
CS_ONLINE,
diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c
index 9829c67ebc0a..c9960baaa14f 100644
--- a/kernel/cgroup/pids.c
+++ b/kernel/cgroup/pids.c
@@ -247,7 +247,7 @@ static void pids_cancel_fork(struct task_struct *task)
pids_uncharge(pids, 1);
}
-static void pids_free(struct task_struct *task)
+static void pids_release(struct task_struct *task)
{
struct pids_cgroup *pids = css_pids(task_css(task, pids_cgrp_id));
@@ -342,7 +342,7 @@ struct cgroup_subsys pids_cgrp_subsys = {
.cancel_attach = pids_cancel_attach,
.can_fork = pids_can_fork,
.cancel_fork = pids_cancel_fork,
- .free = pids_free,
+ .release = pids_release,
.legacy_cftypes = pids_files,
.dfl_cftypes = pids_files,
.threaded = true,
diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c
index d3bbb757ee49..1d75ae7f1cb7 100644
--- a/kernel/cgroup/rdma.c
+++ b/kernel/cgroup/rdma.c
@@ -313,10 +313,8 @@ EXPORT_SYMBOL(rdmacg_try_charge);
* If IB stack wish a device to participate in rdma cgroup resource
* tracking, it must invoke this API to register with rdma cgroup before
* any user space application can start using the RDMA resources.
- * Returns 0 on success or EINVAL when table length given is beyond
- * supported size.
*/
-int rdmacg_register_device(struct rdmacg_device *device)
+void rdmacg_register_device(struct rdmacg_device *device)
{
INIT_LIST_HEAD(&device->dev_node);
INIT_LIST_HEAD(&device->rpools);
@@ -324,7 +322,6 @@ int rdmacg_register_device(struct rdmacg_device *device)
mutex_lock(&rdmacg_mutex);
list_add_tail(&device->dev_node, &rdmacg_devices);
mutex_unlock(&rdmacg_mutex);
- return 0;
}
EXPORT_SYMBOL(rdmacg_register_device);
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index d503d1a9007c..bb95a35e8c2d 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -87,7 +87,6 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
struct cgroup *root, int cpu)
{
struct cgroup_rstat_cpu *rstatc;
- struct cgroup *parent;
if (pos == root)
return NULL;
@@ -115,8 +114,8 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
* However, due to the way we traverse, @pos will be the first
* child in most cases. The only exception is @root.
*/
- parent = cgroup_parent(pos);
- if (parent && rstatc->updated_next) {
+ if (rstatc->updated_next) {
+ struct cgroup *parent = cgroup_parent(pos);
struct cgroup_rstat_cpu *prstatc = cgroup_rstat_cpu(parent, cpu);
struct cgroup_rstat_cpu *nrstatc;
struct cgroup **nextp;
@@ -140,9 +139,12 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
* updated stat.
*/
smp_mb();
+
+ return pos;
}
- return pos;
+ /* only happens for @root */
+ return NULL;
}
/* see cgroup_rstat_flush() */
diff --git a/kernel/configs.c b/kernel/configs.c
index 2df132b20217..b062425ccf8d 100644
--- a/kernel/configs.c
+++ b/kernel/configs.c
@@ -30,37 +30,35 @@
#include <linux/init.h>
#include <linux/uaccess.h>
-/**************************************************/
-/* the actual current config file */
-
/*
- * Define kernel_config_data and kernel_config_data_size, which contains the
- * wrapped and compressed configuration file. The file is first compressed
- * with gzip and then bounded by two eight byte magic numbers to allow
- * extraction from a binary kernel image:
- *
- * IKCFG_ST
- * <image>
- * IKCFG_ED
+ * "IKCFG_ST" and "IKCFG_ED" are used to extract the config data from
+ * a binary kernel image or a module. See scripts/extract-ikconfig.
*/
-#define MAGIC_START "IKCFG_ST"
-#define MAGIC_END "IKCFG_ED"
-#include "config_data.h"
-
-
-#define MAGIC_SIZE (sizeof(MAGIC_START) - 1)
-#define kernel_config_data_size \
- (sizeof(kernel_config_data) - 1 - MAGIC_SIZE * 2)
+asm (
+" .pushsection .rodata, \"a\" \n"
+" .ascii \"IKCFG_ST\" \n"
+" .global kernel_config_data \n"
+"kernel_config_data: \n"
+" .incbin \"kernel/config_data.gz\" \n"
+" .global kernel_config_data_end \n"
+"kernel_config_data_end: \n"
+" .ascii \"IKCFG_ED\" \n"
+" .popsection \n"
+);
#ifdef CONFIG_IKCONFIG_PROC
+extern char kernel_config_data;
+extern char kernel_config_data_end;
+
static ssize_t
ikconfig_read_current(struct file *file, char __user *buf,
size_t len, loff_t * offset)
{
return simple_read_from_buffer(buf, len, offset,
- kernel_config_data + MAGIC_SIZE,
- kernel_config_data_size);
+ &kernel_config_data,
+ &kernel_config_data_end -
+ &kernel_config_data);
}
static const struct file_operations ikconfig_file_ops = {
@@ -79,7 +77,7 @@ static int __init ikconfig_init(void)
if (!entry)
return -ENOMEM;
- proc_set_size(entry, kernel_config_data_size);
+ proc_set_size(entry, &kernel_config_data_end - &kernel_config_data);
return 0;
}
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 933cb3e45b98..093c9f917ed0 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -464,6 +464,8 @@ static int __init crash_save_vmcoreinfo_init(void)
VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
#ifdef CONFIG_HUGETLB_PAGE
VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR);
+#define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline)
+ VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE);
#endif
arch_crash_save_vmcoreinfo();
diff --git a/kernel/cred.c b/kernel/cred.c
index 21f4a97085b4..45d77284aed0 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -760,19 +760,6 @@ bool creds_are_invalid(const struct cred *cred)
{
if (cred->magic != CRED_MAGIC)
return true;
-#ifdef CONFIG_SECURITY_SELINUX
- /*
- * cred->security == NULL if security_cred_alloc_blank() or
- * security_prepare_creds() returned an error.
- */
- if (selinux_is_enabled() && cred->security) {
- if ((unsigned long) cred->security < PAGE_SIZE)
- return true;
- if ((*(u32 *)cred->security & 0xffffff00) ==
- (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))
- return true;
- }
-#endif
return false;
}
EXPORT_SYMBOL(creds_are_invalid);
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index ca88b867e7fe..a06ba3013b3b 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -16,7 +16,16 @@ config ARCH_DMA_ADDR_T_64BIT
config ARCH_HAS_DMA_COHERENCE_H
bool
-config HAVE_GENERIC_DMA_COHERENT
+config ARCH_HAS_DMA_SET_MASK
+ bool
+
+config DMA_DECLARE_COHERENT
+ bool
+
+config ARCH_HAS_SETUP_DMA_OPS
+ bool
+
+config ARCH_HAS_TEARDOWN_DMA_OPS
bool
config ARCH_HAS_SYNC_DMA_FOR_DEVICE
@@ -53,3 +62,116 @@ config DMA_REMAP
config DMA_DIRECT_REMAP
bool
select DMA_REMAP
+
+config DMA_CMA
+ bool "DMA Contiguous Memory Allocator"
+ depends on HAVE_DMA_CONTIGUOUS && CMA
+ help
+ This enables the Contiguous Memory Allocator which allows drivers
+ to allocate big physically-contiguous blocks of memory for use with
+ hardware components that do not support I/O map nor scatter-gather.
+
+ You can disable CMA by specifying "cma=0" on the kernel's command
+ line.
+
+ For more information see <include/linux/dma-contiguous.h>.
+ If unsure, say "n".
+
+if DMA_CMA
+comment "Default contiguous memory area size:"
+
+config CMA_SIZE_MBYTES
+ int "Size in Mega Bytes"
+ depends on !CMA_SIZE_SEL_PERCENTAGE
+ default 0 if X86
+ default 16
+ help
+ Defines the size (in MiB) of the default memory area for Contiguous
+ Memory Allocator. If the size of 0 is selected, CMA is disabled by
+ default, but it can be enabled by passing cma=size[MG] to the kernel.
+
+
+config CMA_SIZE_PERCENTAGE
+ int "Percentage of total memory"
+ depends on !CMA_SIZE_SEL_MBYTES
+ default 0 if X86
+ default 10
+ help
+ Defines the size of the default memory area for Contiguous Memory
+ Allocator as a percentage of the total memory in the system.
+ If 0 percent is selected, CMA is disabled by default, but it can be
+ enabled by passing cma=size[MG] to the kernel.
+
+choice
+ prompt "Selected region size"
+ default CMA_SIZE_SEL_MBYTES
+
+config CMA_SIZE_SEL_MBYTES
+ bool "Use mega bytes value only"
+
+config CMA_SIZE_SEL_PERCENTAGE
+ bool "Use percentage value only"
+
+config CMA_SIZE_SEL_MIN
+ bool "Use lower value (minimum)"
+
+config CMA_SIZE_SEL_MAX
+ bool "Use higher value (maximum)"
+
+endchoice
+
+config CMA_ALIGNMENT
+ int "Maximum PAGE_SIZE order of alignment for contiguous buffers"
+ range 4 12
+ default 8
+ help
+ DMA mapping framework by default aligns all buffers to the smallest
+ PAGE_SIZE order which is greater than or equal to the requested buffer
+ size. This works well for buffers up to a few hundreds kilobytes, but
+ for larger buffers it just a memory waste. With this parameter you can
+ specify the maximum PAGE_SIZE order for contiguous buffers. Larger
+ buffers will be aligned only to this specified order. The order is
+ expressed as a power of two multiplied by the PAGE_SIZE.
+
+ For example, if your system defaults to 4KiB pages, the order value
+ of 8 means that the buffers will be aligned up to 1MiB only.
+
+ If unsure, leave the default value "8".
+
+endif
+
+config DMA_API_DEBUG
+ bool "Enable debugging of DMA-API usage"
+ select NEED_DMA_MAP_STATE
+ help
+ Enable this option to debug the use of the DMA API by device drivers.
+ With this option you will be able to detect common bugs in device
+ drivers like double-freeing of DMA mappings or freeing mappings that
+ were never allocated.
+
+ This also attempts to catch cases where a page owned by DMA is
+ accessed by the cpu in a way that could cause data corruption. For
+ example, this enables cow_user_page() to check that the source page is
+ not undergoing DMA.
+
+ This option causes a performance degradation. Use only if you want to
+ debug device drivers and dma interactions.
+
+ If unsure, say N.
+
+config DMA_API_DEBUG_SG
+ bool "Debug DMA scatter-gather usage"
+ default y
+ depends on DMA_API_DEBUG
+ help
+ Perform extra checking that callers of dma_map_sg() have respected the
+ appropriate segment length/boundary limits for the given device when
+ preparing DMA scatterlists.
+
+ This is particularly likely to have been overlooked in cases where the
+ dma_map_sg() API is used for general bulk mapping of pages rather than
+ preparing literal scatter-gather descriptors, where there is a risk of
+ unexpected behaviour from DMA API implementations if the scatterlist
+ is technically out-of-spec.
+
+ If unsure, say N.
diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile
index 72ff6e46aa86..d237cf3dc181 100644
--- a/kernel/dma/Makefile
+++ b/kernel/dma/Makefile
@@ -2,7 +2,7 @@
obj-$(CONFIG_HAS_DMA) += mapping.o direct.o dummy.o
obj-$(CONFIG_DMA_CMA) += contiguous.o
-obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += coherent.o
+obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o
obj-$(CONFIG_DMA_VIRT_OPS) += virt.o
obj-$(CONFIG_DMA_API_DEBUG) += debug.o
obj-$(CONFIG_SWIOTLB) += swiotlb.o
diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
index 66f0fb7e9a3a..29fd6590dc1e 100644
--- a/kernel/dma/coherent.c
+++ b/kernel/dma/coherent.c
@@ -14,7 +14,6 @@ struct dma_coherent_mem {
dma_addr_t device_base;
unsigned long pfn_base;
int size;
- int flags;
unsigned long *bitmap;
spinlock_t spinlock;
bool use_dev_dma_pfn_offset;
@@ -38,12 +37,12 @@ static inline dma_addr_t dma_get_device_base(struct device *dev,
return mem->device_base;
}
-static int dma_init_coherent_memory(
- phys_addr_t phys_addr, dma_addr_t device_addr, size_t size, int flags,
- struct dma_coherent_mem **mem)
+static int dma_init_coherent_memory(phys_addr_t phys_addr,
+ dma_addr_t device_addr, size_t size,
+ struct dma_coherent_mem **mem)
{
struct dma_coherent_mem *dma_mem = NULL;
- void __iomem *mem_base = NULL;
+ void *mem_base = NULL;
int pages = size >> PAGE_SHIFT;
int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
int ret;
@@ -73,7 +72,6 @@ static int dma_init_coherent_memory(
dma_mem->device_base = device_addr;
dma_mem->pfn_base = PFN_DOWN(phys_addr);
dma_mem->size = pages;
- dma_mem->flags = flags;
spin_lock_init(&dma_mem->spinlock);
*mem = dma_mem;
@@ -110,12 +108,12 @@ static int dma_assign_coherent_memory(struct device *dev,
}
int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
- dma_addr_t device_addr, size_t size, int flags)
+ dma_addr_t device_addr, size_t size)
{
struct dma_coherent_mem *mem;
int ret;
- ret = dma_init_coherent_memory(phys_addr, device_addr, size, flags, &mem);
+ ret = dma_init_coherent_memory(phys_addr, device_addr, size, &mem);
if (ret)
return ret;
@@ -137,29 +135,6 @@ void dma_release_declared_memory(struct device *dev)
}
EXPORT_SYMBOL(dma_release_declared_memory);
-void *dma_mark_declared_memory_occupied(struct device *dev,
- dma_addr_t device_addr, size_t size)
-{
- struct dma_coherent_mem *mem = dev->dma_mem;
- unsigned long flags;
- int pos, err;
-
- size += device_addr & ~PAGE_MASK;
-
- if (!mem)
- return ERR_PTR(-EINVAL);
-
- spin_lock_irqsave(&mem->spinlock, flags);
- pos = PFN_DOWN(device_addr - dma_get_device_base(dev, mem));
- err = bitmap_allocate_region(mem->bitmap, pos, get_order(size));
- spin_unlock_irqrestore(&mem->spinlock, flags);
-
- if (err != 0)
- return ERR_PTR(err);
- return mem->virt_base + (pos << PAGE_SHIFT);
-}
-EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
-
static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
ssize_t size, dma_addr_t *dma_handle)
{
@@ -213,15 +188,7 @@ int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
return 0;
*ret = __dma_alloc_from_coherent(mem, size, dma_handle);
- if (*ret)
- return 1;
-
- /*
- * In the case where the allocation can not be satisfied from the
- * per-device area, try to fall back to generic memory if the
- * constraints allow it.
- */
- return mem->flags & DMA_MEMORY_EXCLUSIVE;
+ return 1;
}
void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle)
@@ -350,8 +317,7 @@ static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev)
if (!mem) {
ret = dma_init_coherent_memory(rmem->base, rmem->base,
- rmem->size,
- DMA_MEMORY_EXCLUSIVE, &mem);
+ rmem->size, &mem);
if (ret) {
pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %ld MiB\n",
&rmem->base, (unsigned long)rmem->size / SZ_1M);
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index 23cf5361bcf1..45d51e8e26f6 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -134,17 +134,6 @@ static u32 nr_total_entries;
/* number of preallocated entries requested by kernel cmdline */
static u32 nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES;
-/* debugfs dentry's for the stuff above */
-static struct dentry *dma_debug_dent __read_mostly;
-static struct dentry *global_disable_dent __read_mostly;
-static struct dentry *error_count_dent __read_mostly;
-static struct dentry *show_all_errors_dent __read_mostly;
-static struct dentry *show_num_errors_dent __read_mostly;
-static struct dentry *num_free_entries_dent __read_mostly;
-static struct dentry *min_free_entries_dent __read_mostly;
-static struct dentry *nr_total_entries_dent __read_mostly;
-static struct dentry *filter_dent __read_mostly;
-
/* per-driver filter related state */
#define NAME_MAX_LEN 64
@@ -840,66 +829,46 @@ static const struct file_operations filter_fops = {
.llseek = default_llseek,
};
-static int dma_debug_fs_init(void)
+static int dump_show(struct seq_file *seq, void *v)
{
- dma_debug_dent = debugfs_create_dir("dma-api", NULL);
- if (!dma_debug_dent) {
- pr_err("can not create debugfs directory\n");
- return -ENOMEM;
- }
+ int idx;
- global_disable_dent = debugfs_create_bool("disabled", 0444,
- dma_debug_dent,
- &global_disable);
- if (!global_disable_dent)
- goto out_err;
-
- error_count_dent = debugfs_create_u32("error_count", 0444,
- dma_debug_dent, &error_count);
- if (!error_count_dent)
- goto out_err;
-
- show_all_errors_dent = debugfs_create_u32("all_errors", 0644,
- dma_debug_dent,
- &show_all_errors);
- if (!show_all_errors_dent)
- goto out_err;
-
- show_num_errors_dent = debugfs_create_u32("num_errors", 0644,
- dma_debug_dent,
- &show_num_errors);
- if (!show_num_errors_dent)
- goto out_err;
-
- num_free_entries_dent = debugfs_create_u32("num_free_entries", 0444,
- dma_debug_dent,
- &num_free_entries);
- if (!num_free_entries_dent)
- goto out_err;
-
- min_free_entries_dent = debugfs_create_u32("min_free_entries", 0444,
- dma_debug_dent,
- &min_free_entries);
- if (!min_free_entries_dent)
- goto out_err;
-
- nr_total_entries_dent = debugfs_create_u32("nr_total_entries", 0444,
- dma_debug_dent,
- &nr_total_entries);
- if (!nr_total_entries_dent)
- goto out_err;
-
- filter_dent = debugfs_create_file("driver_filter", 0644,
- dma_debug_dent, NULL, &filter_fops);
- if (!filter_dent)
- goto out_err;
+ for (idx = 0; idx < HASH_SIZE; idx++) {
+ struct hash_bucket *bucket = &dma_entry_hash[idx];
+ struct dma_debug_entry *entry;
+ unsigned long flags;
+ spin_lock_irqsave(&bucket->lock, flags);
+ list_for_each_entry(entry, &bucket->list, list) {
+ seq_printf(seq,
+ "%s %s %s idx %d P=%llx N=%lx D=%llx L=%llx %s %s\n",
+ dev_name(entry->dev),
+ dev_driver_string(entry->dev),
+ type2name[entry->type], idx,
+ phys_addr(entry), entry->pfn,
+ entry->dev_addr, entry->size,
+ dir2name[entry->direction],
+ maperr2str[entry->map_err_type]);
+ }
+ spin_unlock_irqrestore(&bucket->lock, flags);
+ }
return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(dump);
-out_err:
- debugfs_remove_recursive(dma_debug_dent);
-
- return -ENOMEM;
+static void dma_debug_fs_init(void)
+{
+ struct dentry *dentry = debugfs_create_dir("dma-api", NULL);
+
+ debugfs_create_bool("disabled", 0444, dentry, &global_disable);
+ debugfs_create_u32("error_count", 0444, dentry, &error_count);
+ debugfs_create_u32("all_errors", 0644, dentry, &show_all_errors);
+ debugfs_create_u32("num_errors", 0644, dentry, &show_num_errors);
+ debugfs_create_u32("num_free_entries", 0444, dentry, &num_free_entries);
+ debugfs_create_u32("min_free_entries", 0444, dentry, &min_free_entries);
+ debugfs_create_u32("nr_total_entries", 0444, dentry, &nr_total_entries);
+ debugfs_create_file("driver_filter", 0644, dentry, NULL, &filter_fops);
+ debugfs_create_file("dump", 0444, dentry, NULL, &dump_fops);
}
static int device_dma_allocations(struct device *dev, struct dma_debug_entry **out_entry)
@@ -985,12 +954,7 @@ static int dma_debug_init(void)
spin_lock_init(&dma_entry_hash[i].lock);
}
- if (dma_debug_fs_init() != 0) {
- pr_err("error creating debugfs entries - disabling\n");
- global_disable = true;
-
- return 0;
- }
+ dma_debug_fs_init();
nr_pages = DIV_ROUND_UP(nr_prealloc_entries, DMA_DEBUG_DYNAMIC_ENTRIES);
for (i = 0; i < nr_pages; ++i)
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 355d16acee6d..fcdb23e8d2fc 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -132,8 +132,7 @@ again:
goto again;
}
- if (IS_ENABLED(CONFIG_ZONE_DMA) &&
- phys_mask < DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) {
+ if (IS_ENABLED(CONFIG_ZONE_DMA) && !(gfp & GFP_DMA)) {
gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
goto again;
}
@@ -356,6 +355,20 @@ out_unmap:
}
EXPORT_SYMBOL(dma_direct_map_sg);
+dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+ dma_addr_t dma_addr = paddr;
+
+ if (unlikely(!dma_direct_possible(dev, dma_addr, size))) {
+ report_addr(dev, dma_addr, size);
+ return DMA_MAPPING_ERROR;
+ }
+
+ return dma_addr;
+}
+EXPORT_SYMBOL(dma_direct_map_resource);
+
/*
* Because 32-bit DMA masks are so common we expect every architecture to be
* able to satisfy them - either by not supporting more physical memory, or by
@@ -380,3 +393,14 @@ int dma_direct_supported(struct device *dev, u64 mask)
*/
return mask >= __phys_to_dma(dev, min_mask);
}
+
+size_t dma_direct_max_mapping_size(struct device *dev)
+{
+ size_t size = SIZE_MAX;
+
+ /* If SWIOTLB is active, use its maximum mapping size */
+ if (is_swiotlb_active())
+ size = swiotlb_max_mapping_size(dev);
+
+ return size;
+}
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index a11006b6d8e8..c000906348c9 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -207,7 +207,6 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
}
EXPORT_SYMBOL(dma_mmap_attrs);
-#ifndef ARCH_HAS_DMA_GET_REQUIRED_MASK
static u64 dma_default_get_required_mask(struct device *dev)
{
u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT);
@@ -238,7 +237,6 @@ u64 dma_get_required_mask(struct device *dev)
return dma_default_get_required_mask(dev);
}
EXPORT_SYMBOL_GPL(dma_get_required_mask);
-#endif
#ifndef arch_dma_alloc_attrs
#define arch_dma_alloc_attrs(dev) (true)
@@ -318,18 +316,23 @@ int dma_supported(struct device *dev, u64 mask)
}
EXPORT_SYMBOL(dma_supported);
-#ifndef HAVE_ARCH_DMA_SET_MASK
+#ifdef CONFIG_ARCH_HAS_DMA_SET_MASK
+void arch_dma_set_mask(struct device *dev, u64 mask);
+#else
+#define arch_dma_set_mask(dev, mask) do { } while (0)
+#endif
+
int dma_set_mask(struct device *dev, u64 mask)
{
if (!dev->dma_mask || !dma_supported(dev, mask))
return -EIO;
+ arch_dma_set_mask(dev, mask);
dma_check_mask(dev, mask);
*dev->dma_mask = mask;
return 0;
}
EXPORT_SYMBOL(dma_set_mask);
-#endif
#ifndef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK
int dma_set_coherent_mask(struct device *dev, u64 mask)
@@ -357,3 +360,17 @@ void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
ops->cache_sync(dev, vaddr, size, dir);
}
EXPORT_SYMBOL(dma_cache_sync);
+
+size_t dma_max_mapping_size(struct device *dev)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+ size_t size = SIZE_MAX;
+
+ if (dma_is_direct(ops))
+ size = dma_direct_max_mapping_size(dev);
+ else if (ops && ops->max_mapping_size)
+ size = ops->max_mapping_size(dev);
+
+ return size;
+}
+EXPORT_SYMBOL_GPL(dma_max_mapping_size);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 1fb6fd68b9c7..9f5851064aea 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -34,6 +34,9 @@
#include <linux/scatterlist.h>
#include <linux/mem_encrypt.h>
#include <linux/set_memory.h>
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif
#include <asm/io.h>
#include <asm/dma.h>
@@ -73,6 +76,11 @@ phys_addr_t io_tlb_start, io_tlb_end;
static unsigned long io_tlb_nslabs;
/*
+ * The number of used IO TLB block
+ */
+static unsigned long io_tlb_used;
+
+/*
* This is a free list describing the number of free entries available from
* each index
*/
@@ -385,7 +393,7 @@ void __init swiotlb_exit(void)
}
/*
- * Bounce: copy the swiotlb buffer back to the original dma location
+ * Bounce: copy the swiotlb buffer from or back to the original dma location
*/
static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
size_t size, enum dma_data_direction dir)
@@ -475,6 +483,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
* request and allocate a buffer from that IO TLB pool.
*/
spin_lock_irqsave(&io_tlb_lock, flags);
+
+ if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
+ goto not_found;
+
index = ALIGN(io_tlb_index, stride);
if (index >= io_tlb_nslabs)
index = 0;
@@ -524,6 +536,7 @@ not_found:
dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size);
return DMA_MAPPING_ERROR;
found:
+ io_tlb_used += nslots;
spin_unlock_irqrestore(&io_tlb_lock, flags);
/*
@@ -584,6 +597,8 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
*/
for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
io_tlb_list[i] = ++count;
+
+ io_tlb_used -= nslots;
}
spin_unlock_irqrestore(&io_tlb_lock, flags);
}
@@ -651,14 +666,49 @@ bool swiotlb_map(struct device *dev, phys_addr_t *phys, dma_addr_t *dma_addr,
return true;
}
-/*
- * Return whether the given device DMA address mask can be supported
- * properly. For example, if your device can only drive the low 24-bits
- * during bus mastering, then you would pass 0x00ffffff as the mask to
- * this function.
- */
-int
-swiotlb_dma_supported(struct device *hwdev, u64 mask)
+size_t swiotlb_max_mapping_size(struct device *dev)
{
- return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
+ return ((size_t)1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE;
}
+
+bool is_swiotlb_active(void)
+{
+ /*
+ * When SWIOTLB is initialized, even if io_tlb_start points to physical
+ * address zero, io_tlb_end surely doesn't.
+ */
+ return io_tlb_end != 0;
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+static int __init swiotlb_create_debugfs(void)
+{
+ struct dentry *d_swiotlb_usage;
+ struct dentry *ent;
+
+ d_swiotlb_usage = debugfs_create_dir("swiotlb", NULL);
+
+ if (!d_swiotlb_usage)
+ return -ENOMEM;
+
+ ent = debugfs_create_ulong("io_tlb_nslabs", 0400,
+ d_swiotlb_usage, &io_tlb_nslabs);
+ if (!ent)
+ goto fail;
+
+ ent = debugfs_create_ulong("io_tlb_used", 0400,
+ d_swiotlb_usage, &io_tlb_used);
+ if (!ent)
+ goto fail;
+
+ return 0;
+
+fail:
+ debugfs_remove_recursive(d_swiotlb_usage);
+ return -ENOMEM;
+}
+
+late_initcall(swiotlb_create_debugfs);
+
+#endif
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5f59d848171e..f7086c388781 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5473,7 +5473,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
/* now it's safe to free the pages */
atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
- vma->vm_mm->pinned_vm -= rb->aux_mmap_locked;
+ atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm);
/* this has to be the last one */
rb_free_aux(rb);
@@ -5546,7 +5546,7 @@ again:
*/
atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm);
- vma->vm_mm->pinned_vm -= mmap_locked;
+ atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
free_uid(mmap_user);
out_put:
@@ -5694,7 +5694,7 @@ accounting:
lock_limit = rlimit(RLIMIT_MEMLOCK);
lock_limit >>= PAGE_SHIFT;
- locked = vma->vm_mm->pinned_vm + extra;
+ locked = atomic64_read(&vma->vm_mm->pinned_vm) + extra;
if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
!capable(CAP_IPC_LOCK)) {
@@ -5735,7 +5735,7 @@ accounting:
unlock:
if (!ret) {
atomic_long_add(user_extra, &user->locked_vm);
- vma->vm_mm->pinned_vm += extra;
+ atomic64_add(extra, &vma->vm_mm->pinned_vm);
atomic_inc(&event->mmap_count);
} else if (rb) {
diff --git a/kernel/exit.c b/kernel/exit.c
index 2639a30a8aa5..2166c2d92ddc 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -219,6 +219,7 @@ repeat:
}
write_unlock_irq(&tasklist_lock);
+ cgroup_release(p);
release_thread(p);
call_rcu(&p->rcu, delayed_put_task_struct);
diff --git a/kernel/fork.c b/kernel/fork.c
index 77059b211608..9dcd18aa210b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -77,7 +77,6 @@
#include <linux/blkdev.h>
#include <linux/fs_struct.h>
#include <linux/magic.h>
-#include <linux/sched/mm.h>
#include <linux/perf_event.h>
#include <linux/posix-timers.h>
#include <linux/user-return-notifier.h>
@@ -981,7 +980,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
mm_pgtables_bytes_init(mm);
mm->map_count = 0;
mm->locked_vm = 0;
- mm->pinned_vm = 0;
+ atomic64_set(&mm->pinned_vm, 0);
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
spin_lock_init(&mm->page_table_lock);
spin_lock_init(&mm->arg_lock);
diff --git a/kernel/gcov/gcc_3_4.c b/kernel/gcov/gcc_3_4.c
index 1e32e66c9563..2dddecbdbe6e 100644
--- a/kernel/gcov/gcc_3_4.c
+++ b/kernel/gcov/gcc_3_4.c
@@ -245,8 +245,7 @@ struct gcov_info *gcov_info_dup(struct gcov_info *info)
/* Duplicate gcov_info. */
active = num_counter_active(info);
- dup = kzalloc(sizeof(struct gcov_info) +
- sizeof(struct gcov_ctr_info) * active, GFP_KERNEL);
+ dup = kzalloc(struct_size(dup, counts, active), GFP_KERNEL);
if (!dup)
return NULL;
dup->version = info->version;
@@ -364,8 +363,7 @@ struct gcov_iterator *gcov_iter_new(struct gcov_info *info)
{
struct gcov_iterator *iter;
- iter = kzalloc(sizeof(struct gcov_iterator) +
- num_counter_active(info) * sizeof(struct type_info),
+ iter = kzalloc(struct_size(iter, type_info, num_counter_active(info)),
GFP_KERNEL);
if (iter)
iter->info = info;
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 4a9191617076..f108a95882c6 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -19,6 +19,7 @@
#include <linux/utsname.h>
#include <linux/sched/signal.h>
#include <linux/sched/debug.h>
+#include <linux/sched/sysctl.h>
#include <trace/events/sched.h>
@@ -126,7 +127,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
if (sysctl_hung_task_warnings > 0)
sysctl_hung_task_warnings--;
pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
- t->comm, t->pid, timeout);
+ t->comm, t->pid, (jiffies - t->last_switch_time) / HZ);
pr_err(" %s %s %.*s\n",
print_tainted(), init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 99b7dd6982a4..3faef4a77f71 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -1340,6 +1340,17 @@ void irq_chip_mask_parent(struct irq_data *data)
EXPORT_SYMBOL_GPL(irq_chip_mask_parent);
/**
+ * irq_chip_mask_ack_parent - Mask and acknowledge the parent interrupt
+ * @data: Pointer to interrupt specific data
+ */
+void irq_chip_mask_ack_parent(struct irq_data *data)
+{
+ data = data->parent_data;
+ data->chip->irq_mask_ack(data);
+}
+EXPORT_SYMBOL_GPL(irq_chip_mask_ack_parent);
+
+/**
* irq_chip_unmask_parent - Unmask the parent interrupt
* @data: Pointer to interrupt specific data
*/
@@ -1443,6 +1454,7 @@ int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on)
return -ENOSYS;
}
+EXPORT_SYMBOL_GPL(irq_chip_set_wake_parent);
#endif
/**
diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c
index 98a20e1594ce..b992f88c5613 100644
--- a/kernel/irq/irq_sim.c
+++ b/kernel/irq/irq_sim.c
@@ -25,10 +25,22 @@ static void irq_sim_irqunmask(struct irq_data *data)
irq_ctx->enabled = true;
}
+static int irq_sim_set_type(struct irq_data *data, unsigned int type)
+{
+ /* We only support rising and falling edge trigger types. */
+ if (type & ~IRQ_TYPE_EDGE_BOTH)
+ return -EINVAL;
+
+ irqd_set_trigger_type(data, type);
+
+ return 0;
+}
+
static struct irq_chip irq_sim_irqchip = {
.name = "irq_sim",
.irq_mask = irq_sim_irqmask,
.irq_unmask = irq_sim_irqunmask,
+ .irq_set_type = irq_sim_set_type,
};
static void irq_sim_handle_irq(struct irq_work *work)
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 3bf9793d8825..9ed29e4a7dbf 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -743,16 +743,17 @@ static int irq_domain_translate(struct irq_domain *d,
return 0;
}
-static void of_phandle_args_to_fwspec(struct of_phandle_args *irq_data,
+static void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
+ unsigned int count,
struct irq_fwspec *fwspec)
{
int i;
- fwspec->fwnode = irq_data->np ? &irq_data->np->fwnode : NULL;
- fwspec->param_count = irq_data->args_count;
+ fwspec->fwnode = np ? &np->fwnode : NULL;
+ fwspec->param_count = count;
- for (i = 0; i < irq_data->args_count; i++)
- fwspec->param[i] = irq_data->args[i];
+ for (i = 0; i < count; i++)
+ fwspec->param[i] = args[i];
}
unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
@@ -850,7 +851,9 @@ unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data)
{
struct irq_fwspec fwspec;
- of_phandle_args_to_fwspec(irq_data, &fwspec);
+ of_phandle_args_to_fwspec(irq_data->np, irq_data->args,
+ irq_data->args_count, &fwspec);
+
return irq_create_fwspec_mapping(&fwspec);
}
EXPORT_SYMBOL_GPL(irq_create_of_mapping);
@@ -942,11 +945,10 @@ int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr,
const u32 *intspec, unsigned int intsize,
irq_hw_number_t *out_hwirq, unsigned int *out_type)
{
- if (WARN_ON(intsize < 2))
- return -EINVAL;
- *out_hwirq = intspec[0];
- *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK;
- return 0;
+ struct irq_fwspec fwspec;
+
+ of_phandle_args_to_fwspec(ctrlr, intspec, intsize, &fwspec);
+ return irq_domain_translate_twocell(d, &fwspec, out_hwirq, out_type);
}
EXPORT_SYMBOL_GPL(irq_domain_xlate_twocell);
@@ -982,6 +984,27 @@ const struct irq_domain_ops irq_domain_simple_ops = {
};
EXPORT_SYMBOL_GPL(irq_domain_simple_ops);
+/**
+ * irq_domain_translate_twocell() - Generic translate for direct two cell
+ * bindings
+ *
+ * Device Tree IRQ specifier translation function which works with two cell
+ * bindings where the cell values map directly to the hwirq number
+ * and linux irq flags.
+ */
+int irq_domain_translate_twocell(struct irq_domain *d,
+ struct irq_fwspec *fwspec,
+ unsigned long *out_hwirq,
+ unsigned int *out_type)
+{
+ if (WARN_ON(fwspec->param_count < 2))
+ return -EINVAL;
+ *out_hwirq = fwspec->param[0];
+ *out_type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(irq_domain_translate_twocell);
+
int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq,
int node, const struct irq_affinity_desc *affinity)
{
diff --git a/kernel/kcov.c b/kernel/kcov.c
index c2277dbdbfb1..2ee38727844a 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -20,6 +20,7 @@
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/kcov.h>
+#include <linux/refcount.h>
#include <asm/setup.h>
/* Number of 64-bit words written per one comparison: */
@@ -44,7 +45,7 @@ struct kcov {
* - opened file descriptor
* - task with enabled coverage (we can't unwire it from another task)
*/
- atomic_t refcount;
+ refcount_t refcount;
/* The lock protects mode, size, area and t. */
spinlock_t lock;
enum kcov_mode mode;
@@ -228,12 +229,12 @@ EXPORT_SYMBOL(__sanitizer_cov_trace_switch);
static void kcov_get(struct kcov *kcov)
{
- atomic_inc(&kcov->refcount);
+ refcount_inc(&kcov->refcount);
}
static void kcov_put(struct kcov *kcov)
{
- if (atomic_dec_and_test(&kcov->refcount)) {
+ if (refcount_dec_and_test(&kcov->refcount)) {
vfree(kcov->area);
kfree(kcov);
}
@@ -312,7 +313,7 @@ static int kcov_open(struct inode *inode, struct file *filep)
if (!kcov)
return -ENOMEM;
kcov->mode = KCOV_MODE_DISABLED;
- atomic_set(&kcov->refcount, 1);
+ refcount_set(&kcov->refcount, 1);
spin_lock_init(&kcov->lock);
filep->private_data = kcov;
return nonseekable_open(inode, filep);
@@ -444,10 +445,8 @@ static int __init kcov_init(void)
* there is no need to protect it against removal races. The
* use of debugfs_create_file_unsafe() is actually safe here.
*/
- if (!debugfs_create_file_unsafe("kcov", 0600, NULL, NULL, &kcov_fops)) {
- pr_err("failed to create kcov in debugfs\n");
- return -ENOMEM;
- }
+ debugfs_create_file_unsafe("kcov", 0600, NULL, NULL, &kcov_fops);
+
return 0;
}
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 9cf20cc5ebe3..5942eeafb9ac 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -20,6 +20,7 @@
#include <linux/freezer.h>
#include <linux/ptrace.h>
#include <linux/uaccess.h>
+#include <linux/numa.h>
#include <trace/events/sched.h>
static DEFINE_SPINLOCK(kthread_create_lock);
@@ -681,7 +682,7 @@ __kthread_create_worker(int cpu, unsigned int flags,
{
struct kthread_worker *worker;
struct task_struct *task;
- int node = -1;
+ int node = NUMA_NO_NODE;
worker = kzalloc(sizeof(*worker), GFP_KERNEL);
if (!worker)
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 5b77a7314e01..eb0ee10a1981 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -45,7 +45,12 @@
*/
DEFINE_MUTEX(klp_mutex);
-static LIST_HEAD(klp_patches);
+/*
+ * Actively used patches: enabled or in transition. Note that replaced
+ * or disabled patches are not listed even though the related kernel
+ * module still can be loaded.
+ */
+LIST_HEAD(klp_patches);
static struct kobject *klp_root_kobj;
@@ -82,20 +87,43 @@ static void klp_find_object_module(struct klp_object *obj)
mutex_unlock(&module_mutex);
}
-static bool klp_is_patch_registered(struct klp_patch *patch)
+static bool klp_initialized(void)
+{
+ return !!klp_root_kobj;
+}
+
+static struct klp_func *klp_find_func(struct klp_object *obj,
+ struct klp_func *old_func)
{
- struct klp_patch *mypatch;
+ struct klp_func *func;
- list_for_each_entry(mypatch, &klp_patches, list)
- if (mypatch == patch)
- return true;
+ klp_for_each_func(obj, func) {
+ if ((strcmp(old_func->old_name, func->old_name) == 0) &&
+ (old_func->old_sympos == func->old_sympos)) {
+ return func;
+ }
+ }
- return false;
+ return NULL;
}
-static bool klp_initialized(void)
+static struct klp_object *klp_find_object(struct klp_patch *patch,
+ struct klp_object *old_obj)
{
- return !!klp_root_kobj;
+ struct klp_object *obj;
+
+ klp_for_each_object(patch, obj) {
+ if (klp_is_module(old_obj)) {
+ if (klp_is_module(obj) &&
+ strcmp(old_obj->name, obj->name) == 0) {
+ return obj;
+ }
+ } else if (!klp_is_module(obj)) {
+ return obj;
+ }
+ }
+
+ return NULL;
}
struct klp_find_arg {
@@ -278,170 +306,6 @@ static int klp_write_object_relocations(struct module *pmod,
return ret;
}
-static int __klp_disable_patch(struct klp_patch *patch)
-{
- struct klp_object *obj;
-
- if (WARN_ON(!patch->enabled))
- return -EINVAL;
-
- if (klp_transition_patch)
- return -EBUSY;
-
- /* enforce stacking: only the last enabled patch can be disabled */
- if (!list_is_last(&patch->list, &klp_patches) &&
- list_next_entry(patch, list)->enabled)
- return -EBUSY;
-
- klp_init_transition(patch, KLP_UNPATCHED);
-
- klp_for_each_object(patch, obj)
- if (obj->patched)
- klp_pre_unpatch_callback(obj);
-
- /*
- * Enforce the order of the func->transition writes in
- * klp_init_transition() and the TIF_PATCH_PENDING writes in
- * klp_start_transition(). In the rare case where klp_ftrace_handler()
- * is called shortly after klp_update_patch_state() switches the task,
- * this ensures the handler sees that func->transition is set.
- */
- smp_wmb();
-
- klp_start_transition();
- klp_try_complete_transition();
- patch->enabled = false;
-
- return 0;
-}
-
-/**
- * klp_disable_patch() - disables a registered patch
- * @patch: The registered, enabled patch to be disabled
- *
- * Unregisters the patched functions from ftrace.
- *
- * Return: 0 on success, otherwise error
- */
-int klp_disable_patch(struct klp_patch *patch)
-{
- int ret;
-
- mutex_lock(&klp_mutex);
-
- if (!klp_is_patch_registered(patch)) {
- ret = -EINVAL;
- goto err;
- }
-
- if (!patch->enabled) {
- ret = -EINVAL;
- goto err;
- }
-
- ret = __klp_disable_patch(patch);
-
-err:
- mutex_unlock(&klp_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(klp_disable_patch);
-
-static int __klp_enable_patch(struct klp_patch *patch)
-{
- struct klp_object *obj;
- int ret;
-
- if (klp_transition_patch)
- return -EBUSY;
-
- if (WARN_ON(patch->enabled))
- return -EINVAL;
-
- /* enforce stacking: only the first disabled patch can be enabled */
- if (patch->list.prev != &klp_patches &&
- !list_prev_entry(patch, list)->enabled)
- return -EBUSY;
-
- /*
- * A reference is taken on the patch module to prevent it from being
- * unloaded.
- */
- if (!try_module_get(patch->mod))
- return -ENODEV;
-
- pr_notice("enabling patch '%s'\n", patch->mod->name);
-
- klp_init_transition(patch, KLP_PATCHED);
-
- /*
- * Enforce the order of the func->transition writes in
- * klp_init_transition() and the ops->func_stack writes in
- * klp_patch_object(), so that klp_ftrace_handler() will see the
- * func->transition updates before the handler is registered and the
- * new funcs become visible to the handler.
- */
- smp_wmb();
-
- klp_for_each_object(patch, obj) {
- if (!klp_is_object_loaded(obj))
- continue;
-
- ret = klp_pre_patch_callback(obj);
- if (ret) {
- pr_warn("pre-patch callback failed for object '%s'\n",
- klp_is_module(obj) ? obj->name : "vmlinux");
- goto err;
- }
-
- ret = klp_patch_object(obj);
- if (ret) {
- pr_warn("failed to patch object '%s'\n",
- klp_is_module(obj) ? obj->name : "vmlinux");
- goto err;
- }
- }
-
- klp_start_transition();
- klp_try_complete_transition();
- patch->enabled = true;
-
- return 0;
-err:
- pr_warn("failed to enable patch '%s'\n", patch->mod->name);
-
- klp_cancel_transition();
- return ret;
-}
-
-/**
- * klp_enable_patch() - enables a registered patch
- * @patch: The registered, disabled patch to be enabled
- *
- * Performs the needed symbol lookups and code relocations,
- * then registers the patched functions with ftrace.
- *
- * Return: 0 on success, otherwise error
- */
-int klp_enable_patch(struct klp_patch *patch)
-{
- int ret;
-
- mutex_lock(&klp_mutex);
-
- if (!klp_is_patch_registered(patch)) {
- ret = -EINVAL;
- goto err;
- }
-
- ret = __klp_enable_patch(patch);
-
-err:
- mutex_unlock(&klp_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(klp_enable_patch);
-
/*
* Sysfs Interface
*
@@ -449,11 +313,11 @@ EXPORT_SYMBOL_GPL(klp_enable_patch);
* /sys/kernel/livepatch/<patch>
* /sys/kernel/livepatch/<patch>/enabled
* /sys/kernel/livepatch/<patch>/transition
- * /sys/kernel/livepatch/<patch>/signal
* /sys/kernel/livepatch/<patch>/force
* /sys/kernel/livepatch/<patch>/<object>
* /sys/kernel/livepatch/<patch>/<object>/<function,sympos>
*/
+static int __klp_disable_patch(struct klp_patch *patch);
static ssize_t enabled_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
@@ -470,40 +334,32 @@ static ssize_t enabled_store(struct kobject *kobj, struct kobj_attribute *attr,
mutex_lock(&klp_mutex);
- if (!klp_is_patch_registered(patch)) {
- /*
- * Module with the patch could either disappear meanwhile or is
- * not properly initialized yet.
- */
- ret = -EINVAL;
- goto err;
- }
-
if (patch->enabled == enabled) {
/* already in requested state */
ret = -EINVAL;
- goto err;
+ goto out;
}
- if (patch == klp_transition_patch) {
+ /*
+ * Allow to reverse a pending transition in both ways. It might be
+ * necessary to complete the transition without forcing and breaking
+ * the system integrity.
+ *
+ * Do not allow to re-enable a disabled patch.
+ */
+ if (patch == klp_transition_patch)
klp_reverse_transition();
- } else if (enabled) {
- ret = __klp_enable_patch(patch);
- if (ret)
- goto err;
- } else {
+ else if (!enabled)
ret = __klp_disable_patch(patch);
- if (ret)
- goto err;
- }
+ else
+ ret = -EINVAL;
+out:
mutex_unlock(&klp_mutex);
+ if (ret)
+ return ret;
return count;
-
-err:
- mutex_unlock(&klp_mutex);
- return ret;
}
static ssize_t enabled_show(struct kobject *kobj,
@@ -525,35 +381,6 @@ static ssize_t transition_show(struct kobject *kobj,
patch == klp_transition_patch);
}
-static ssize_t signal_store(struct kobject *kobj, struct kobj_attribute *attr,
- const char *buf, size_t count)
-{
- struct klp_patch *patch;
- int ret;
- bool val;
-
- ret = kstrtobool(buf, &val);
- if (ret)
- return ret;
-
- if (!val)
- return count;
-
- mutex_lock(&klp_mutex);
-
- patch = container_of(kobj, struct klp_patch, kobj);
- if (patch != klp_transition_patch) {
- mutex_unlock(&klp_mutex);
- return -EINVAL;
- }
-
- klp_send_signals();
-
- mutex_unlock(&klp_mutex);
-
- return count;
-}
-
static ssize_t force_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
@@ -585,16 +412,129 @@ static ssize_t force_store(struct kobject *kobj, struct kobj_attribute *attr,
static struct kobj_attribute enabled_kobj_attr = __ATTR_RW(enabled);
static struct kobj_attribute transition_kobj_attr = __ATTR_RO(transition);
-static struct kobj_attribute signal_kobj_attr = __ATTR_WO(signal);
static struct kobj_attribute force_kobj_attr = __ATTR_WO(force);
static struct attribute *klp_patch_attrs[] = {
&enabled_kobj_attr.attr,
&transition_kobj_attr.attr,
- &signal_kobj_attr.attr,
&force_kobj_attr.attr,
NULL
};
+static void klp_free_object_dynamic(struct klp_object *obj)
+{
+ kfree(obj->name);
+ kfree(obj);
+}
+
+static struct klp_object *klp_alloc_object_dynamic(const char *name)
+{
+ struct klp_object *obj;
+
+ obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return NULL;
+
+ if (name) {
+ obj->name = kstrdup(name, GFP_KERNEL);
+ if (!obj->name) {
+ kfree(obj);
+ return NULL;
+ }
+ }
+
+ INIT_LIST_HEAD(&obj->func_list);
+ obj->dynamic = true;
+
+ return obj;
+}
+
+static void klp_free_func_nop(struct klp_func *func)
+{
+ kfree(func->old_name);
+ kfree(func);
+}
+
+static struct klp_func *klp_alloc_func_nop(struct klp_func *old_func,
+ struct klp_object *obj)
+{
+ struct klp_func *func;
+
+ func = kzalloc(sizeof(*func), GFP_KERNEL);
+ if (!func)
+ return NULL;
+
+ if (old_func->old_name) {
+ func->old_name = kstrdup(old_func->old_name, GFP_KERNEL);
+ if (!func->old_name) {
+ kfree(func);
+ return NULL;
+ }
+ }
+
+ /*
+ * func->new_func is same as func->old_func. These addresses are
+ * set when the object is loaded, see klp_init_object_loaded().
+ */
+ func->old_sympos = old_func->old_sympos;
+ func->nop = true;
+
+ return func;
+}
+
+static int klp_add_object_nops(struct klp_patch *patch,
+ struct klp_object *old_obj)
+{
+ struct klp_object *obj;
+ struct klp_func *func, *old_func;
+
+ obj = klp_find_object(patch, old_obj);
+
+ if (!obj) {
+ obj = klp_alloc_object_dynamic(old_obj->name);
+ if (!obj)
+ return -ENOMEM;
+
+ list_add_tail(&obj->node, &patch->obj_list);
+ }
+
+ klp_for_each_func(old_obj, old_func) {
+ func = klp_find_func(obj, old_func);
+ if (func)
+ continue;
+
+ func = klp_alloc_func_nop(old_func, obj);
+ if (!func)
+ return -ENOMEM;
+
+ list_add_tail(&func->node, &obj->func_list);
+ }
+
+ return 0;
+}
+
+/*
+ * Add 'nop' functions which simply return to the caller to run
+ * the original function. The 'nop' functions are added to a
+ * patch to facilitate a 'replace' mode.
+ */
+static int klp_add_nops(struct klp_patch *patch)
+{
+ struct klp_patch *old_patch;
+ struct klp_object *old_obj;
+
+ klp_for_each_patch(old_patch) {
+ klp_for_each_object(old_patch, old_obj) {
+ int err;
+
+ err = klp_add_object_nops(patch, old_obj);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
static void klp_kobj_release_patch(struct kobject *kobj)
{
struct klp_patch *patch;
@@ -611,6 +551,12 @@ static struct kobj_type klp_ktype_patch = {
static void klp_kobj_release_object(struct kobject *kobj)
{
+ struct klp_object *obj;
+
+ obj = container_of(kobj, struct klp_object, kobj);
+
+ if (obj->dynamic)
+ klp_free_object_dynamic(obj);
}
static struct kobj_type klp_ktype_object = {
@@ -620,6 +566,12 @@ static struct kobj_type klp_ktype_object = {
static void klp_kobj_release_func(struct kobject *kobj)
{
+ struct klp_func *func;
+
+ func = container_of(kobj, struct klp_func, kobj);
+
+ if (func->nop)
+ klp_free_func_nop(func);
}
static struct kobj_type klp_ktype_func = {
@@ -627,17 +579,23 @@ static struct kobj_type klp_ktype_func = {
.sysfs_ops = &kobj_sysfs_ops,
};
-/*
- * Free all functions' kobjects in the array up to some limit. When limit is
- * NULL, all kobjects are freed.
- */
-static void klp_free_funcs_limited(struct klp_object *obj,
- struct klp_func *limit)
+static void __klp_free_funcs(struct klp_object *obj, bool nops_only)
{
- struct klp_func *func;
+ struct klp_func *func, *tmp_func;
+
+ klp_for_each_func_safe(obj, func, tmp_func) {
+ if (nops_only && !func->nop)
+ continue;
+
+ list_del(&func->node);
- for (func = obj->funcs; func->old_name && func != limit; func++)
- kobject_put(&func->kobj);
+ /* Might be called from klp_init_patch() error path. */
+ if (func->kobj_added) {
+ kobject_put(&func->kobj);
+ } else if (func->nop) {
+ klp_free_func_nop(func);
+ }
+ }
}
/* Clean up when a patched object is unloaded */
@@ -647,35 +605,111 @@ static void klp_free_object_loaded(struct klp_object *obj)
obj->mod = NULL;
- klp_for_each_func(obj, func)
- func->old_addr = 0;
+ klp_for_each_func(obj, func) {
+ func->old_func = NULL;
+
+ if (func->nop)
+ func->new_func = NULL;
+ }
}
-/*
- * Free all objects' kobjects in the array up to some limit. When limit is
- * NULL, all kobjects are freed.
- */
-static void klp_free_objects_limited(struct klp_patch *patch,
- struct klp_object *limit)
+static void __klp_free_objects(struct klp_patch *patch, bool nops_only)
{
- struct klp_object *obj;
+ struct klp_object *obj, *tmp_obj;
+
+ klp_for_each_object_safe(patch, obj, tmp_obj) {
+ __klp_free_funcs(obj, nops_only);
+
+ if (nops_only && !obj->dynamic)
+ continue;
+
+ list_del(&obj->node);
- for (obj = patch->objs; obj->funcs && obj != limit; obj++) {
- klp_free_funcs_limited(obj, NULL);
- kobject_put(&obj->kobj);
+ /* Might be called from klp_init_patch() error path. */
+ if (obj->kobj_added) {
+ kobject_put(&obj->kobj);
+ } else if (obj->dynamic) {
+ klp_free_object_dynamic(obj);
+ }
}
}
-static void klp_free_patch(struct klp_patch *patch)
+static void klp_free_objects(struct klp_patch *patch)
+{
+ __klp_free_objects(patch, false);
+}
+
+static void klp_free_objects_dynamic(struct klp_patch *patch)
+{
+ __klp_free_objects(patch, true);
+}
+
+/*
+ * This function implements the free operations that can be called safely
+ * under klp_mutex.
+ *
+ * The operation must be completed by calling klp_free_patch_finish()
+ * outside klp_mutex.
+ */
+void klp_free_patch_start(struct klp_patch *patch)
{
- klp_free_objects_limited(patch, NULL);
if (!list_empty(&patch->list))
list_del(&patch->list);
+
+ klp_free_objects(patch);
+}
+
+/*
+ * This function implements the free part that must be called outside
+ * klp_mutex.
+ *
+ * It must be called after klp_free_patch_start(). And it has to be
+ * the last function accessing the livepatch structures when the patch
+ * gets disabled.
+ */
+static void klp_free_patch_finish(struct klp_patch *patch)
+{
+ /*
+ * Avoid deadlock with enabled_store() sysfs callback by
+ * calling this outside klp_mutex. It is safe because
+ * this is called when the patch gets disabled and it
+ * cannot get enabled again.
+ */
+ if (patch->kobj_added) {
+ kobject_put(&patch->kobj);
+ wait_for_completion(&patch->finish);
+ }
+
+ /* Put the module after the last access to struct klp_patch. */
+ if (!patch->forced)
+ module_put(patch->mod);
+}
+
+/*
+ * The livepatch might be freed from sysfs interface created by the patch.
+ * This work allows to wait until the interface is destroyed in a separate
+ * context.
+ */
+static void klp_free_patch_work_fn(struct work_struct *work)
+{
+ struct klp_patch *patch =
+ container_of(work, struct klp_patch, free_work);
+
+ klp_free_patch_finish(patch);
}
static int klp_init_func(struct klp_object *obj, struct klp_func *func)
{
- if (!func->old_name || !func->new_func)
+ int ret;
+
+ if (!func->old_name)
+ return -EINVAL;
+
+ /*
+ * NOPs get the address later. The patched module must be loaded,
+ * see klp_init_object_loaded().
+ */
+ if (!func->new_func && !func->nop)
return -EINVAL;
if (strlen(func->old_name) >= KSYM_NAME_LEN)
@@ -690,9 +724,13 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func)
* object. If the user selects 0 for old_sympos, then 1 will be used
* since a unique symbol will be the first occurrence.
*/
- return kobject_init_and_add(&func->kobj, &klp_ktype_func,
- &obj->kobj, "%s,%lu", func->old_name,
- func->old_sympos ? func->old_sympos : 1);
+ ret = kobject_init_and_add(&func->kobj, &klp_ktype_func,
+ &obj->kobj, "%s,%lu", func->old_name,
+ func->old_sympos ? func->old_sympos : 1);
+ if (!ret)
+ func->kobj_added = true;
+
+ return ret;
}
/* Arches may override this to finish any remaining arch-specific tasks */
@@ -721,11 +759,11 @@ static int klp_init_object_loaded(struct klp_patch *patch,
klp_for_each_func(obj, func) {
ret = klp_find_object_symbol(obj->name, func->old_name,
func->old_sympos,
- &func->old_addr);
+ (unsigned long *)&func->old_func);
if (ret)
return ret;
- ret = kallsyms_lookup_size_offset(func->old_addr,
+ ret = kallsyms_lookup_size_offset((unsigned long)func->old_func,
&func->old_size, NULL);
if (!ret) {
pr_err("kallsyms size lookup failed for '%s'\n",
@@ -733,6 +771,9 @@ static int klp_init_object_loaded(struct klp_patch *patch,
return -ENOENT;
}
+ if (func->nop)
+ func->new_func = func->old_func;
+
ret = kallsyms_lookup_size_offset((unsigned long)func->new_func,
&func->new_size, NULL);
if (!ret) {
@@ -751,9 +792,6 @@ static int klp_init_object(struct klp_patch *patch, struct klp_object *obj)
int ret;
const char *name;
- if (!obj->funcs)
- return -EINVAL;
-
if (klp_is_module(obj) && strlen(obj->name) >= MODULE_NAME_LEN)
return -EINVAL;
@@ -767,122 +805,191 @@ static int klp_init_object(struct klp_patch *patch, struct klp_object *obj)
&patch->kobj, "%s", name);
if (ret)
return ret;
+ obj->kobj_added = true;
klp_for_each_func(obj, func) {
ret = klp_init_func(obj, func);
if (ret)
- goto free;
+ return ret;
}
- if (klp_is_object_loaded(obj)) {
+ if (klp_is_object_loaded(obj))
ret = klp_init_object_loaded(patch, obj);
- if (ret)
- goto free;
- }
- return 0;
-
-free:
- klp_free_funcs_limited(obj, func);
- kobject_put(&obj->kobj);
return ret;
}
-static int klp_init_patch(struct klp_patch *patch)
+static int klp_init_patch_early(struct klp_patch *patch)
{
struct klp_object *obj;
- int ret;
+ struct klp_func *func;
if (!patch->objs)
return -EINVAL;
- mutex_lock(&klp_mutex);
-
+ INIT_LIST_HEAD(&patch->list);
+ INIT_LIST_HEAD(&patch->obj_list);
+ patch->kobj_added = false;
patch->enabled = false;
+ patch->forced = false;
+ INIT_WORK(&patch->free_work, klp_free_patch_work_fn);
init_completion(&patch->finish);
+ klp_for_each_object_static(patch, obj) {
+ if (!obj->funcs)
+ return -EINVAL;
+
+ INIT_LIST_HEAD(&obj->func_list);
+ obj->kobj_added = false;
+ list_add_tail(&obj->node, &patch->obj_list);
+
+ klp_for_each_func_static(obj, func) {
+ func->kobj_added = false;
+ list_add_tail(&func->node, &obj->func_list);
+ }
+ }
+
+ if (!try_module_get(patch->mod))
+ return -ENODEV;
+
+ return 0;
+}
+
+static int klp_init_patch(struct klp_patch *patch)
+{
+ struct klp_object *obj;
+ int ret;
+
ret = kobject_init_and_add(&patch->kobj, &klp_ktype_patch,
klp_root_kobj, "%s", patch->mod->name);
- if (ret) {
- mutex_unlock(&klp_mutex);
+ if (ret)
return ret;
+ patch->kobj_added = true;
+
+ if (patch->replace) {
+ ret = klp_add_nops(patch);
+ if (ret)
+ return ret;
}
klp_for_each_object(patch, obj) {
ret = klp_init_object(patch, obj);
if (ret)
- goto free;
+ return ret;
}
list_add_tail(&patch->list, &klp_patches);
- mutex_unlock(&klp_mutex);
-
return 0;
+}
-free:
- klp_free_objects_limited(patch, obj);
+static int __klp_disable_patch(struct klp_patch *patch)
+{
+ struct klp_object *obj;
- mutex_unlock(&klp_mutex);
+ if (WARN_ON(!patch->enabled))
+ return -EINVAL;
- kobject_put(&patch->kobj);
- wait_for_completion(&patch->finish);
+ if (klp_transition_patch)
+ return -EBUSY;
- return ret;
+ klp_init_transition(patch, KLP_UNPATCHED);
+
+ klp_for_each_object(patch, obj)
+ if (obj->patched)
+ klp_pre_unpatch_callback(obj);
+
+ /*
+ * Enforce the order of the func->transition writes in
+ * klp_init_transition() and the TIF_PATCH_PENDING writes in
+ * klp_start_transition(). In the rare case where klp_ftrace_handler()
+ * is called shortly after klp_update_patch_state() switches the task,
+ * this ensures the handler sees that func->transition is set.
+ */
+ smp_wmb();
+
+ klp_start_transition();
+ patch->enabled = false;
+ klp_try_complete_transition();
+
+ return 0;
}
-/**
- * klp_unregister_patch() - unregisters a patch
- * @patch: Disabled patch to be unregistered
- *
- * Frees the data structures and removes the sysfs interface.
- *
- * Return: 0 on success, otherwise error
- */
-int klp_unregister_patch(struct klp_patch *patch)
+static int __klp_enable_patch(struct klp_patch *patch)
{
+ struct klp_object *obj;
int ret;
- mutex_lock(&klp_mutex);
+ if (klp_transition_patch)
+ return -EBUSY;
- if (!klp_is_patch_registered(patch)) {
- ret = -EINVAL;
- goto err;
- }
+ if (WARN_ON(patch->enabled))
+ return -EINVAL;
- if (patch->enabled) {
- ret = -EBUSY;
- goto err;
- }
+ if (!patch->kobj_added)
+ return -EINVAL;
- klp_free_patch(patch);
+ pr_notice("enabling patch '%s'\n", patch->mod->name);
- mutex_unlock(&klp_mutex);
+ klp_init_transition(patch, KLP_PATCHED);
+
+ /*
+ * Enforce the order of the func->transition writes in
+ * klp_init_transition() and the ops->func_stack writes in
+ * klp_patch_object(), so that klp_ftrace_handler() will see the
+ * func->transition updates before the handler is registered and the
+ * new funcs become visible to the handler.
+ */
+ smp_wmb();
+
+ klp_for_each_object(patch, obj) {
+ if (!klp_is_object_loaded(obj))
+ continue;
+
+ ret = klp_pre_patch_callback(obj);
+ if (ret) {
+ pr_warn("pre-patch callback failed for object '%s'\n",
+ klp_is_module(obj) ? obj->name : "vmlinux");
+ goto err;
+ }
+
+ ret = klp_patch_object(obj);
+ if (ret) {
+ pr_warn("failed to patch object '%s'\n",
+ klp_is_module(obj) ? obj->name : "vmlinux");
+ goto err;
+ }
+ }
- kobject_put(&patch->kobj);
- wait_for_completion(&patch->finish);
+ klp_start_transition();
+ patch->enabled = true;
+ klp_try_complete_transition();
return 0;
err:
- mutex_unlock(&klp_mutex);
+ pr_warn("failed to enable patch '%s'\n", patch->mod->name);
+
+ klp_cancel_transition();
return ret;
}
-EXPORT_SYMBOL_GPL(klp_unregister_patch);
/**
- * klp_register_patch() - registers a patch
- * @patch: Patch to be registered
+ * klp_enable_patch() - enable the livepatch
+ * @patch: patch to be enabled
*
- * Initializes the data structure associated with the patch and
- * creates the sysfs interface.
+ * Initializes the data structure associated with the patch, creates the sysfs
+ * interface, performs the needed symbol lookups and code relocations,
+ * registers the patched functions with ftrace.
*
- * There is no need to take the reference on the patch module here. It is done
- * later when the patch is enabled.
+ * This function is supposed to be called from the livepatch module_init()
+ * callback.
*
* Return: 0 on success, otherwise error
*/
-int klp_register_patch(struct klp_patch *patch)
+int klp_enable_patch(struct klp_patch *patch)
{
+ int ret;
+
if (!patch || !patch->mod)
return -EINVAL;
@@ -897,12 +1004,91 @@ int klp_register_patch(struct klp_patch *patch)
if (!klp_have_reliable_stack()) {
pr_err("This architecture doesn't have support for the livepatch consistency model.\n");
- return -ENOSYS;
+ return -EOPNOTSUPP;
+ }
+
+
+ mutex_lock(&klp_mutex);
+
+ ret = klp_init_patch_early(patch);
+ if (ret) {
+ mutex_unlock(&klp_mutex);
+ return ret;
}
- return klp_init_patch(patch);
+ ret = klp_init_patch(patch);
+ if (ret)
+ goto err;
+
+ ret = __klp_enable_patch(patch);
+ if (ret)
+ goto err;
+
+ mutex_unlock(&klp_mutex);
+
+ return 0;
+
+err:
+ klp_free_patch_start(patch);
+
+ mutex_unlock(&klp_mutex);
+
+ klp_free_patch_finish(patch);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(klp_enable_patch);
+
+/*
+ * This function removes replaced patches.
+ *
+ * We could be pretty aggressive here. It is called in the situation where
+ * these structures are no longer accessible. All functions are redirected
+ * by the klp_transition_patch. They use either a new code or they are in
+ * the original code because of the special nop function patches.
+ *
+ * The only exception is when the transition was forced. In this case,
+ * klp_ftrace_handler() might still see the replaced patch on the stack.
+ * Fortunately, it is carefully designed to work with removed functions
+ * thanks to RCU. We only have to keep the patches on the system. Also
+ * this is handled transparently by patch->module_put.
+ */
+void klp_discard_replaced_patches(struct klp_patch *new_patch)
+{
+ struct klp_patch *old_patch, *tmp_patch;
+
+ klp_for_each_patch_safe(old_patch, tmp_patch) {
+ if (old_patch == new_patch)
+ return;
+
+ old_patch->enabled = false;
+ klp_unpatch_objects(old_patch);
+ klp_free_patch_start(old_patch);
+ schedule_work(&old_patch->free_work);
+ }
+}
+
+/*
+ * This function removes the dynamically allocated 'nop' functions.
+ *
+ * We could be pretty aggressive. NOPs do not change the existing
+ * behavior except for adding unnecessary delay by the ftrace handler.
+ *
+ * It is safe even when the transition was forced. The ftrace handler
+ * will see a valid ops->func_stack entry thanks to RCU.
+ *
+ * We could even free the NOPs structures. They must be the last entry
+ * in ops->func_stack. Therefore unregister_ftrace_function() is called.
+ * It does the same as klp_synchronize_transition() to make sure that
+ * nobody is inside the ftrace handler once the operation finishes.
+ *
+ * IMPORTANT: It must be called right after removing the replaced patches!
+ */
+void klp_discard_nops(struct klp_patch *new_patch)
+{
+ klp_unpatch_objects_dynamic(klp_transition_patch);
+ klp_free_objects_dynamic(klp_transition_patch);
}
-EXPORT_SYMBOL_GPL(klp_register_patch);
/*
* Remove parts of patches that touch a given kernel module. The list of
@@ -915,7 +1101,7 @@ static void klp_cleanup_module_patches_limited(struct module *mod,
struct klp_patch *patch;
struct klp_object *obj;
- list_for_each_entry(patch, &klp_patches, list) {
+ klp_for_each_patch(patch) {
if (patch == limit)
break;
@@ -923,21 +1109,14 @@ static void klp_cleanup_module_patches_limited(struct module *mod,
if (!klp_is_module(obj) || strcmp(obj->name, mod->name))
continue;
- /*
- * Only unpatch the module if the patch is enabled or
- * is in transition.
- */
- if (patch->enabled || patch == klp_transition_patch) {
+ if (patch != klp_transition_patch)
+ klp_pre_unpatch_callback(obj);
- if (patch != klp_transition_patch)
- klp_pre_unpatch_callback(obj);
+ pr_notice("reverting patch '%s' on unloading module '%s'\n",
+ patch->mod->name, obj->mod->name);
+ klp_unpatch_object(obj);
- pr_notice("reverting patch '%s' on unloading module '%s'\n",
- patch->mod->name, obj->mod->name);
- klp_unpatch_object(obj);
-
- klp_post_unpatch_callback(obj);
- }
+ klp_post_unpatch_callback(obj);
klp_free_object_loaded(obj);
break;
@@ -962,7 +1141,7 @@ int klp_module_coming(struct module *mod)
*/
mod->klp_alive = true;
- list_for_each_entry(patch, &klp_patches, list) {
+ klp_for_each_patch(patch) {
klp_for_each_object(patch, obj) {
if (!klp_is_module(obj) || strcmp(obj->name, mod->name))
continue;
@@ -976,13 +1155,6 @@ int klp_module_coming(struct module *mod)
goto err;
}
- /*
- * Only patch the module if the patch is enabled or is
- * in transition.
- */
- if (!patch->enabled && patch != klp_transition_patch)
- break;
-
pr_notice("applying patch '%s' to loading module '%s'\n",
patch->mod->name, obj->mod->name);
diff --git a/kernel/livepatch/core.h b/kernel/livepatch/core.h
index 48a83d4364cf..ec43a40b853f 100644
--- a/kernel/livepatch/core.h
+++ b/kernel/livepatch/core.h
@@ -5,6 +5,17 @@
#include <linux/livepatch.h>
extern struct mutex klp_mutex;
+extern struct list_head klp_patches;
+
+#define klp_for_each_patch_safe(patch, tmp_patch) \
+ list_for_each_entry_safe(patch, tmp_patch, &klp_patches, list)
+
+#define klp_for_each_patch(patch) \
+ list_for_each_entry(patch, &klp_patches, list)
+
+void klp_free_patch_start(struct klp_patch *patch);
+void klp_discard_replaced_patches(struct klp_patch *new_patch);
+void klp_discard_nops(struct klp_patch *new_patch);
static inline bool klp_is_object_loaded(struct klp_object *obj)
{
diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
index 7702cb4064fc..99cb3ad05eb4 100644
--- a/kernel/livepatch/patch.c
+++ b/kernel/livepatch/patch.c
@@ -34,7 +34,7 @@
static LIST_HEAD(klp_ops);
-struct klp_ops *klp_find_ops(unsigned long old_addr)
+struct klp_ops *klp_find_ops(void *old_func)
{
struct klp_ops *ops;
struct klp_func *func;
@@ -42,7 +42,7 @@ struct klp_ops *klp_find_ops(unsigned long old_addr)
list_for_each_entry(ops, &klp_ops, node) {
func = list_first_entry(&ops->func_stack, struct klp_func,
stack_node);
- if (func->old_addr == old_addr)
+ if (func->old_func == old_func)
return ops;
}
@@ -118,7 +118,15 @@ static void notrace klp_ftrace_handler(unsigned long ip,
}
}
+ /*
+ * NOPs are used to replace existing patches with original code.
+ * Do nothing! Setting pc would cause an infinite loop.
+ */
+ if (func->nop)
+ goto unlock;
+
klp_arch_set_pc(regs, (unsigned long)func->new_func);
+
unlock:
preempt_enable_notrace();
}
@@ -142,17 +150,18 @@ static void klp_unpatch_func(struct klp_func *func)
if (WARN_ON(!func->patched))
return;
- if (WARN_ON(!func->old_addr))
+ if (WARN_ON(!func->old_func))
return;
- ops = klp_find_ops(func->old_addr);
+ ops = klp_find_ops(func->old_func);
if (WARN_ON(!ops))
return;
if (list_is_singular(&ops->func_stack)) {
unsigned long ftrace_loc;
- ftrace_loc = klp_get_ftrace_location(func->old_addr);
+ ftrace_loc =
+ klp_get_ftrace_location((unsigned long)func->old_func);
if (WARN_ON(!ftrace_loc))
return;
@@ -174,17 +183,18 @@ static int klp_patch_func(struct klp_func *func)
struct klp_ops *ops;
int ret;
- if (WARN_ON(!func->old_addr))
+ if (WARN_ON(!func->old_func))
return -EINVAL;
if (WARN_ON(func->patched))
return -EINVAL;
- ops = klp_find_ops(func->old_addr);
+ ops = klp_find_ops(func->old_func);
if (!ops) {
unsigned long ftrace_loc;
- ftrace_loc = klp_get_ftrace_location(func->old_addr);
+ ftrace_loc =
+ klp_get_ftrace_location((unsigned long)func->old_func);
if (!ftrace_loc) {
pr_err("failed to find location for function '%s'\n",
func->old_name);
@@ -236,15 +246,26 @@ err:
return ret;
}
-void klp_unpatch_object(struct klp_object *obj)
+static void __klp_unpatch_object(struct klp_object *obj, bool nops_only)
{
struct klp_func *func;
- klp_for_each_func(obj, func)
+ klp_for_each_func(obj, func) {
+ if (nops_only && !func->nop)
+ continue;
+
if (func->patched)
klp_unpatch_func(func);
+ }
- obj->patched = false;
+ if (obj->dynamic || !nops_only)
+ obj->patched = false;
+}
+
+
+void klp_unpatch_object(struct klp_object *obj)
+{
+ __klp_unpatch_object(obj, false);
}
int klp_patch_object(struct klp_object *obj)
@@ -267,11 +288,21 @@ int klp_patch_object(struct klp_object *obj)
return 0;
}
-void klp_unpatch_objects(struct klp_patch *patch)
+static void __klp_unpatch_objects(struct klp_patch *patch, bool nops_only)
{
struct klp_object *obj;
klp_for_each_object(patch, obj)
if (obj->patched)
- klp_unpatch_object(obj);
+ __klp_unpatch_object(obj, nops_only);
+}
+
+void klp_unpatch_objects(struct klp_patch *patch)
+{
+ __klp_unpatch_objects(patch, false);
+}
+
+void klp_unpatch_objects_dynamic(struct klp_patch *patch)
+{
+ __klp_unpatch_objects(patch, true);
}
diff --git a/kernel/livepatch/patch.h b/kernel/livepatch/patch.h
index e72d8250d04b..d5f2fbe373e0 100644
--- a/kernel/livepatch/patch.h
+++ b/kernel/livepatch/patch.h
@@ -10,7 +10,7 @@
* struct klp_ops - structure for tracking registered ftrace ops structs
*
* A single ftrace_ops is shared between all enabled replacement functions
- * (klp_func structs) which have the same old_addr. This allows the switch
+ * (klp_func structs) which have the same old_func. This allows the switch
* between function versions to happen instantaneously by updating the klp_ops
* struct's func_stack list. The winner is the klp_func at the top of the
* func_stack (front of the list).
@@ -25,10 +25,11 @@ struct klp_ops {
struct ftrace_ops fops;
};
-struct klp_ops *klp_find_ops(unsigned long old_addr);
+struct klp_ops *klp_find_ops(void *old_func);
int klp_patch_object(struct klp_object *obj);
void klp_unpatch_object(struct klp_object *obj);
void klp_unpatch_objects(struct klp_patch *patch);
+void klp_unpatch_objects_dynamic(struct klp_patch *patch);
#endif /* _LIVEPATCH_PATCH_H */
diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index 304d5eb8a98c..9c89ae8b337a 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -29,11 +29,13 @@
#define MAX_STACK_ENTRIES 100
#define STACK_ERR_BUF_SIZE 128
+#define SIGNALS_TIMEOUT 15
+
struct klp_patch *klp_transition_patch;
static int klp_target_state = KLP_UNDEFINED;
-static bool klp_forced = false;
+static unsigned int klp_signals_cnt;
/*
* This work can be performed periodically to finish patching or unpatching any
@@ -87,6 +89,11 @@ static void klp_complete_transition(void)
klp_transition_patch->mod->name,
klp_target_state == KLP_PATCHED ? "patching" : "unpatching");
+ if (klp_transition_patch->replace && klp_target_state == KLP_PATCHED) {
+ klp_discard_replaced_patches(klp_transition_patch);
+ klp_discard_nops(klp_transition_patch);
+ }
+
if (klp_target_state == KLP_UNPATCHED) {
/*
* All tasks have transitioned to KLP_UNPATCHED so we can now
@@ -136,13 +143,6 @@ static void klp_complete_transition(void)
pr_notice("'%s': %s complete\n", klp_transition_patch->mod->name,
klp_target_state == KLP_PATCHED ? "patching" : "unpatching");
- /*
- * klp_forced set implies unbounded increase of module's ref count if
- * the module is disabled/enabled in a loop.
- */
- if (!klp_forced && klp_target_state == KLP_UNPATCHED)
- module_put(klp_transition_patch->mod);
-
klp_target_state = KLP_UNDEFINED;
klp_transition_patch = NULL;
}
@@ -224,11 +224,11 @@ static int klp_check_stack_func(struct klp_func *func,
* Check for the to-be-patched function
* (the previous func).
*/
- ops = klp_find_ops(func->old_addr);
+ ops = klp_find_ops(func->old_func);
if (list_is_singular(&ops->func_stack)) {
/* original function */
- func_addr = func->old_addr;
+ func_addr = (unsigned long)func->old_func;
func_size = func->old_size;
} else {
/* previously patched function */
@@ -348,6 +348,47 @@ done:
}
/*
+ * Sends a fake signal to all non-kthread tasks with TIF_PATCH_PENDING set.
+ * Kthreads with TIF_PATCH_PENDING set are woken up.
+ */
+static void klp_send_signals(void)
+{
+ struct task_struct *g, *task;
+
+ if (klp_signals_cnt == SIGNALS_TIMEOUT)
+ pr_notice("signaling remaining tasks\n");
+
+ read_lock(&tasklist_lock);
+ for_each_process_thread(g, task) {
+ if (!klp_patch_pending(task))
+ continue;
+
+ /*
+ * There is a small race here. We could see TIF_PATCH_PENDING
+ * set and decide to wake up a kthread or send a fake signal.
+ * Meanwhile the task could migrate itself and the action
+ * would be meaningless. It is not serious though.
+ */
+ if (task->flags & PF_KTHREAD) {
+ /*
+ * Wake up a kthread which sleeps interruptedly and
+ * still has not been migrated.
+ */
+ wake_up_state(task, TASK_INTERRUPTIBLE);
+ } else {
+ /*
+ * Send fake signal to all non-kthread tasks which are
+ * still not migrated.
+ */
+ spin_lock_irq(&task->sighand->siglock);
+ signal_wake_up(task, 0);
+ spin_unlock_irq(&task->sighand->siglock);
+ }
+ }
+ read_unlock(&tasklist_lock);
+}
+
+/*
* Try to switch all remaining tasks to the target patch state by walking the
* stacks of sleeping tasks and looking for any to-be-patched or
* to-be-unpatched functions. If such functions are found, the task can't be
@@ -359,6 +400,7 @@ void klp_try_complete_transition(void)
{
unsigned int cpu;
struct task_struct *g, *task;
+ struct klp_patch *patch;
bool complete = true;
WARN_ON_ONCE(klp_target_state == KLP_UNDEFINED);
@@ -396,6 +438,10 @@ void klp_try_complete_transition(void)
put_online_cpus();
if (!complete) {
+ if (klp_signals_cnt && !(klp_signals_cnt % SIGNALS_TIMEOUT))
+ klp_send_signals();
+ klp_signals_cnt++;
+
/*
* Some tasks weren't able to be switched over. Try again
* later and/or wait for other methods like kernel exit
@@ -407,7 +453,18 @@ void klp_try_complete_transition(void)
}
/* we're done, now cleanup the data structures */
+ patch = klp_transition_patch;
klp_complete_transition();
+
+ /*
+ * It would make more sense to free the patch in
+ * klp_complete_transition() but it is called also
+ * from klp_cancel_transition().
+ */
+ if (!patch->enabled) {
+ klp_free_patch_start(patch);
+ schedule_work(&patch->free_work);
+ }
}
/*
@@ -446,6 +503,8 @@ void klp_start_transition(void)
if (task->patch_state != klp_target_state)
set_tsk_thread_flag(task, TIF_PATCH_PENDING);
}
+
+ klp_signals_cnt = 0;
}
/*
@@ -569,47 +628,6 @@ void klp_copy_process(struct task_struct *child)
}
/*
- * Sends a fake signal to all non-kthread tasks with TIF_PATCH_PENDING set.
- * Kthreads with TIF_PATCH_PENDING set are woken up. Only admin can request this
- * action currently.
- */
-void klp_send_signals(void)
-{
- struct task_struct *g, *task;
-
- pr_notice("signaling remaining tasks\n");
-
- read_lock(&tasklist_lock);
- for_each_process_thread(g, task) {
- if (!klp_patch_pending(task))
- continue;
-
- /*
- * There is a small race here. We could see TIF_PATCH_PENDING
- * set and decide to wake up a kthread or send a fake signal.
- * Meanwhile the task could migrate itself and the action
- * would be meaningless. It is not serious though.
- */
- if (task->flags & PF_KTHREAD) {
- /*
- * Wake up a kthread which sleeps interruptedly and
- * still has not been migrated.
- */
- wake_up_state(task, TASK_INTERRUPTIBLE);
- } else {
- /*
- * Send fake signal to all non-kthread tasks which are
- * still not migrated.
- */
- spin_lock_irq(&task->sighand->siglock);
- signal_wake_up(task, 0);
- spin_unlock_irq(&task->sighand->siglock);
- }
- }
- read_unlock(&tasklist_lock);
-}
-
-/*
* Drop TIF_PATCH_PENDING of all tasks on admin's request. This forces an
* existing transition to finish.
*
@@ -620,6 +638,7 @@ void klp_send_signals(void)
*/
void klp_force_transition(void)
{
+ struct klp_patch *patch;
struct task_struct *g, *task;
unsigned int cpu;
@@ -633,5 +652,6 @@ void klp_force_transition(void)
for_each_possible_cpu(cpu)
klp_update_patch_state(idle_task(cpu));
- klp_forced = true;
+ klp_for_each_patch(patch)
+ patch->forced = true;
}
diff --git a/kernel/livepatch/transition.h b/kernel/livepatch/transition.h
index f9d0bc016067..322db16233de 100644
--- a/kernel/livepatch/transition.h
+++ b/kernel/livepatch/transition.h
@@ -11,7 +11,6 @@ void klp_cancel_transition(void);
void klp_start_transition(void);
void klp_try_complete_transition(void);
void klp_reverse_transition(void);
-void klp_send_signals(void);
void klp_force_transition(void);
#endif /* _LIVEPATCH_TRANSITION_H */
diff --git a/kernel/module.c b/kernel/module.c
index 2ad1b5239910..0b9aa8ab89f0 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2719,11 +2719,7 @@ static void dynamic_debug_setup(struct module *mod, struct _ddebug *debug, unsig
{
if (!debug)
return;
-#ifdef CONFIG_DYNAMIC_DEBUG
- if (ddebug_add_module(debug, num, mod->name))
- pr_err("dynamic debug error adding module: %s\n",
- debug->modname);
-#endif
+ ddebug_add_module(debug, num, mod->name);
}
static void dynamic_debug_remove(struct module *mod, struct _ddebug *debug)
diff --git a/kernel/panic.c b/kernel/panic.c
index f121e6ba7e11..0ae0d7332f12 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -642,16 +642,14 @@ static int clear_warn_once_set(void *data, u64 val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(clear_warn_once_fops,
- NULL,
- clear_warn_once_set,
- "%lld\n");
+DEFINE_DEBUGFS_ATTRIBUTE(clear_warn_once_fops, NULL, clear_warn_once_set,
+ "%lld\n");
static __init int register_warn_debugfs(void)
{
/* Don't care about failure */
- debugfs_create_file("clear_warn_once", 0200, NULL,
- NULL, &clear_warn_once_fops);
+ debugfs_create_file_unsafe("clear_warn_once", 0200, NULL, NULL,
+ &clear_warn_once_fops);
return 0;
}
diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
index d9dc2c38764a..7d66ee68aaaf 100644
--- a/kernel/power/energy_model.c
+++ b/kernel/power/energy_model.c
@@ -10,6 +10,7 @@
#include <linux/cpu.h>
#include <linux/cpumask.h>
+#include <linux/debugfs.h>
#include <linux/energy_model.h>
#include <linux/sched/topology.h>
#include <linux/slab.h>
@@ -23,6 +24,60 @@ static DEFINE_PER_CPU(struct em_perf_domain *, em_data);
*/
static DEFINE_MUTEX(em_pd_mutex);
+#ifdef CONFIG_DEBUG_FS
+static struct dentry *rootdir;
+
+static void em_debug_create_cs(struct em_cap_state *cs, struct dentry *pd)
+{
+ struct dentry *d;
+ char name[24];
+
+ snprintf(name, sizeof(name), "cs:%lu", cs->frequency);
+
+ /* Create per-cs directory */
+ d = debugfs_create_dir(name, pd);
+ debugfs_create_ulong("frequency", 0444, d, &cs->frequency);
+ debugfs_create_ulong("power", 0444, d, &cs->power);
+ debugfs_create_ulong("cost", 0444, d, &cs->cost);
+}
+
+static int em_debug_cpus_show(struct seq_file *s, void *unused)
+{
+ seq_printf(s, "%*pbl\n", cpumask_pr_args(to_cpumask(s->private)));
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(em_debug_cpus);
+
+static void em_debug_create_pd(struct em_perf_domain *pd, int cpu)
+{
+ struct dentry *d;
+ char name[8];
+ int i;
+
+ snprintf(name, sizeof(name), "pd%d", cpu);
+
+ /* Create the directory of the performance domain */
+ d = debugfs_create_dir(name, rootdir);
+
+ debugfs_create_file("cpus", 0444, d, pd->cpus, &em_debug_cpus_fops);
+
+ /* Create a sub-directory for each capacity state */
+ for (i = 0; i < pd->nr_cap_states; i++)
+ em_debug_create_cs(&pd->table[i], d);
+}
+
+static int __init em_debug_init(void)
+{
+ /* Create /sys/kernel/debug/energy_model directory */
+ rootdir = debugfs_create_dir("energy_model", NULL);
+
+ return 0;
+}
+core_initcall(em_debug_init);
+#else /* CONFIG_DEBUG_FS */
+static void em_debug_create_pd(struct em_perf_domain *pd, int cpu) {}
+#endif
static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states,
struct em_data_callback *cb)
{
@@ -102,6 +157,8 @@ static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states,
pd->nr_cap_states = nr_states;
cpumask_copy(to_cpumask(pd->cpus), span);
+ em_debug_create_pd(pd, cpu);
+
return pd;
free_cs_table:
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index b7a82502857a..9d22131afc1e 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -582,10 +582,8 @@ static int register_pm_qos_misc(struct pm_qos_object *qos, struct dentry *d)
qos->pm_qos_power_miscdev.name = qos->name;
qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops;
- if (d) {
- (void)debugfs_create_file(qos->name, S_IRUGO, d,
- (void *)qos, &pm_qos_debug_fops);
- }
+ debugfs_create_file(qos->name, S_IRUGO, d, (void *)qos,
+ &pm_qos_debug_fops);
return misc_register(&qos->pm_qos_power_miscdev);
}
@@ -685,8 +683,6 @@ static int __init pm_qos_power_init(void)
BUILD_BUG_ON(ARRAY_SIZE(pm_qos_array) != PM_QOS_NUM_CLASSES);
d = debugfs_create_dir("pm_qos", NULL);
- if (IS_ERR_OR_NULL(d))
- d = NULL;
for (i = PM_QOS_CPU_DMA_LATENCY; i < PM_QOS_NUM_CLASSES; i++) {
ret = register_pm_qos_misc(pm_qos_array[i], d);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 640b2034edd6..4802b039b89f 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1215,14 +1215,16 @@ static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
if (!pfn_valid(pfn))
return NULL;
- page = pfn_to_page(pfn);
- if (page_zone(page) != zone)
+ page = pfn_to_online_page(pfn);
+ if (!page || page_zone(page) != zone)
return NULL;
BUG_ON(!PageHighMem(page));
- if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page) ||
- PageReserved(page))
+ if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
+ return NULL;
+
+ if (PageReserved(page) || PageOffline(page))
return NULL;
if (page_is_guard(page))
@@ -1277,8 +1279,8 @@ static struct page *saveable_page(struct zone *zone, unsigned long pfn)
if (!pfn_valid(pfn))
return NULL;
- page = pfn_to_page(pfn);
- if (page_zone(page) != zone)
+ page = pfn_to_online_page(pfn);
+ if (!page || page_zone(page) != zone)
return NULL;
BUG_ON(PageHighMem(page));
@@ -1286,6 +1288,9 @@ static struct page *saveable_page(struct zone *zone, unsigned long pfn)
if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
return NULL;
+ if (PageOffline(page))
+ return NULL;
+
if (PageReserved(page)
&& (!kernel_page_present(page) || pfn_is_nosave(pfn)))
return NULL;
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index d3d170374ceb..8eee85bb2687 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -344,7 +344,6 @@ static int console_msg_format = MSG_FORMAT_DEFAULT;
enum log_flags {
LOG_NEWLINE = 2, /* text ended with a newline */
- LOG_PREFIX = 4, /* text started with a prefix */
LOG_CONT = 8, /* text is a fragment of a continuation line */
};
@@ -356,6 +355,9 @@ struct printk_log {
u8 facility; /* syslog facility */
u8 flags:5; /* internal record flags */
u8 level:3; /* syslog level */
+#ifdef CONFIG_PRINTK_CALLER
+ u32 caller_id; /* thread id or processor id */
+#endif
}
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
__packed __aligned(4)
@@ -422,7 +424,11 @@ static u64 exclusive_console_stop_seq;
static u64 clear_seq;
static u32 clear_idx;
+#ifdef CONFIG_PRINTK_CALLER
+#define PREFIX_MAX 48
+#else
#define PREFIX_MAX 32
+#endif
#define LOG_LINE_MAX (1024 - PREFIX_MAX)
#define LOG_LEVEL(v) ((v) & 0x07)
@@ -577,7 +583,7 @@ static u32 truncate_msg(u16 *text_len, u16 *trunc_msg_len,
}
/* insert record into the buffer, discard old ones, update heads */
-static int log_store(int facility, int level,
+static int log_store(u32 caller_id, int facility, int level,
enum log_flags flags, u64 ts_nsec,
const char *dict, u16 dict_len,
const char *text, u16 text_len)
@@ -625,6 +631,9 @@ static int log_store(int facility, int level,
msg->ts_nsec = ts_nsec;
else
msg->ts_nsec = local_clock();
+#ifdef CONFIG_PRINTK_CALLER
+ msg->caller_id = caller_id;
+#endif
memset(log_dict(msg) + dict_len, 0, pad_len);
msg->len = size;
@@ -688,12 +697,21 @@ static ssize_t msg_print_ext_header(char *buf, size_t size,
struct printk_log *msg, u64 seq)
{
u64 ts_usec = msg->ts_nsec;
+ char caller[20];
+#ifdef CONFIG_PRINTK_CALLER
+ u32 id = msg->caller_id;
+
+ snprintf(caller, sizeof(caller), ",caller=%c%u",
+ id & 0x80000000 ? 'C' : 'T', id & ~0x80000000);
+#else
+ caller[0] = '\0';
+#endif
do_div(ts_usec, 1000);
- return scnprintf(buf, size, "%u,%llu,%llu,%c;",
- (msg->facility << 3) | msg->level, seq, ts_usec,
- msg->flags & LOG_CONT ? 'c' : '-');
+ return scnprintf(buf, size, "%u,%llu,%llu,%c%s;",
+ (msg->facility << 3) | msg->level, seq, ts_usec,
+ msg->flags & LOG_CONT ? 'c' : '-', caller);
}
static ssize_t msg_print_ext_body(char *buf, size_t size,
@@ -1038,6 +1056,9 @@ void log_buf_vmcoreinfo_setup(void)
VMCOREINFO_OFFSET(printk_log, len);
VMCOREINFO_OFFSET(printk_log, text_len);
VMCOREINFO_OFFSET(printk_log, dict_len);
+#ifdef CONFIG_PRINTK_CALLER
+ VMCOREINFO_OFFSET(printk_log, caller_id);
+#endif
}
#endif
@@ -1236,10 +1257,23 @@ static size_t print_time(u64 ts, char *buf)
{
unsigned long rem_nsec = do_div(ts, 1000000000);
- return sprintf(buf, "[%5lu.%06lu] ",
+ return sprintf(buf, "[%5lu.%06lu]",
(unsigned long)ts, rem_nsec / 1000);
}
+#ifdef CONFIG_PRINTK_CALLER
+static size_t print_caller(u32 id, char *buf)
+{
+ char caller[12];
+
+ snprintf(caller, sizeof(caller), "%c%u",
+ id & 0x80000000 ? 'C' : 'T', id & ~0x80000000);
+ return sprintf(buf, "[%6s]", caller);
+}
+#else
+#define print_caller(id, buf) 0
+#endif
+
static size_t print_prefix(const struct printk_log *msg, bool syslog,
bool time, char *buf)
{
@@ -1247,8 +1281,17 @@ static size_t print_prefix(const struct printk_log *msg, bool syslog,
if (syslog)
len = print_syslog((msg->facility << 3) | msg->level, buf);
+
if (time)
len += print_time(msg->ts_nsec, buf + len);
+
+ len += print_caller(msg->caller_id, buf + len);
+
+ if (IS_ENABLED(CONFIG_PRINTK_CALLER) || time) {
+ buf[len++] = ' ';
+ buf[len] = '\0';
+ }
+
return len;
}
@@ -1752,6 +1795,12 @@ static inline void printk_delay(void)
}
}
+static inline u32 printk_caller_id(void)
+{
+ return in_task() ? task_pid_nr(current) :
+ 0x80000000 + raw_smp_processor_id();
+}
+
/*
* Continuation lines are buffered, and not committed to the record buffer
* until the line is complete, or a race forces it. The line fragments
@@ -1761,7 +1810,7 @@ static inline void printk_delay(void)
static struct cont {
char buf[LOG_LINE_MAX];
size_t len; /* length == 0 means unused buffer */
- struct task_struct *owner; /* task of first print*/
+ u32 caller_id; /* printk_caller_id() of first print */
u64 ts_nsec; /* time of first print */
u8 level; /* log level of first message */
u8 facility; /* log facility of first message */
@@ -1773,12 +1822,13 @@ static void cont_flush(void)
if (cont.len == 0)
return;
- log_store(cont.facility, cont.level, cont.flags, cont.ts_nsec,
- NULL, 0, cont.buf, cont.len);
+ log_store(cont.caller_id, cont.facility, cont.level, cont.flags,
+ cont.ts_nsec, NULL, 0, cont.buf, cont.len);
cont.len = 0;
}
-static bool cont_add(int facility, int level, enum log_flags flags, const char *text, size_t len)
+static bool cont_add(u32 caller_id, int facility, int level,
+ enum log_flags flags, const char *text, size_t len)
{
/* If the line gets too long, split it up in separate records. */
if (cont.len + len > sizeof(cont.buf)) {
@@ -1789,7 +1839,7 @@ static bool cont_add(int facility, int level, enum log_flags flags, const char *
if (!cont.len) {
cont.facility = facility;
cont.level = level;
- cont.owner = current;
+ cont.caller_id = caller_id;
cont.ts_nsec = local_clock();
cont.flags = flags;
}
@@ -1809,13 +1859,15 @@ static bool cont_add(int facility, int level, enum log_flags flags, const char *
static size_t log_output(int facility, int level, enum log_flags lflags, const char *dict, size_t dictlen, char *text, size_t text_len)
{
+ const u32 caller_id = printk_caller_id();
+
/*
* If an earlier line was buffered, and we're a continuation
- * write from the same process, try to add it to the buffer.
+ * write from the same context, try to add it to the buffer.
*/
if (cont.len) {
- if (cont.owner == current && (lflags & LOG_CONT)) {
- if (cont_add(facility, level, lflags, text, text_len))
+ if (cont.caller_id == caller_id && (lflags & LOG_CONT)) {
+ if (cont_add(caller_id, facility, level, lflags, text, text_len))
return text_len;
}
/* Otherwise, make sure it's flushed */
@@ -1828,12 +1880,13 @@ static size_t log_output(int facility, int level, enum log_flags lflags, const c
/* If it doesn't end in a newline, try to buffer the current line */
if (!(lflags & LOG_NEWLINE)) {
- if (cont_add(facility, level, lflags, text, text_len))
+ if (cont_add(caller_id, facility, level, lflags, text, text_len))
return text_len;
}
/* Store it in the record log */
- return log_store(facility, level, lflags, 0, dict, dictlen, text, text_len);
+ return log_store(caller_id, facility, level, lflags, 0,
+ dict, dictlen, text, text_len);
}
/* Must be called under logbuf_lock. */
@@ -1867,9 +1920,6 @@ int vprintk_store(int facility, int level,
case '0' ... '7':
if (level == LOGLEVEL_DEFAULT)
level = kern_level - '0';
- /* fallthrough */
- case 'd': /* KERN_DEFAULT */
- lflags |= LOG_PREFIX;
break;
case 'c': /* KERN_CONT */
lflags |= LOG_CONT;
@@ -1884,7 +1934,7 @@ int vprintk_store(int facility, int level,
level = default_message_loglevel;
if (dict)
- lflags |= LOG_PREFIX|LOG_NEWLINE;
+ lflags |= LOG_NEWLINE;
return log_output(facility, level, lflags,
dict, dictlen, text, text_len);
diff --git a/kernel/resource.c b/kernel/resource.c
index 915c02e8e5dd..e81b17b53fa5 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -448,8 +448,6 @@ int walk_mem_res(u64 start, u64 end, void *arg,
arg, func);
}
-#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
-
/*
* This function calls the @func callback against all memory ranges of type
* System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
@@ -481,8 +479,6 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
return ret;
}
-#endif
-
static int __is_ram(unsigned long pfn, unsigned long nr_pages, void *arg)
{
return 1;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f3901b84d217..ead464a0f2e5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2220,6 +2220,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
INIT_HLIST_HEAD(&p->preempt_notifiers);
#endif
+#ifdef CONFIG_COMPACTION
+ p->capture_control = NULL;
+#endif
init_numa_balancing(clone_flags, p);
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8213ff6e365d..ea74d43924b2 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1173,7 +1173,7 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
/* New address space, reset the preferred nid */
if (!(clone_flags & CLONE_VM)) {
- p->numa_preferred_nid = -1;
+ p->numa_preferred_nid = NUMA_NO_NODE;
return;
}
@@ -1193,13 +1193,13 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
static void account_numa_enqueue(struct rq *rq, struct task_struct *p)
{
- rq->nr_numa_running += (p->numa_preferred_nid != -1);
+ rq->nr_numa_running += (p->numa_preferred_nid != NUMA_NO_NODE);
rq->nr_preferred_running += (p->numa_preferred_nid == task_node(p));
}
static void account_numa_dequeue(struct rq *rq, struct task_struct *p)
{
- rq->nr_numa_running -= (p->numa_preferred_nid != -1);
+ rq->nr_numa_running -= (p->numa_preferred_nid != NUMA_NO_NODE);
rq->nr_preferred_running -= (p->numa_preferred_nid == task_node(p));
}
@@ -1413,7 +1413,7 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
* two full passes of the "multi-stage node selection" test that is
* executed below.
*/
- if ((p->numa_preferred_nid == -1 || p->numa_scan_seq <= 4) &&
+ if ((p->numa_preferred_nid == NUMA_NO_NODE || p->numa_scan_seq <= 4) &&
(cpupid_pid_unset(last_cpupid) || cpupid_match_pid(p, last_cpupid)))
return true;
@@ -1861,7 +1861,7 @@ static void numa_migrate_preferred(struct task_struct *p)
unsigned long interval = HZ;
/* This task has no NUMA fault statistics yet */
- if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults))
+ if (unlikely(p->numa_preferred_nid == NUMA_NO_NODE || !p->numa_faults))
return;
/* Periodically retry migrating the task to the preferred node */
@@ -2108,7 +2108,7 @@ static int preferred_group_nid(struct task_struct *p, int nid)
static void task_numa_placement(struct task_struct *p)
{
- int seq, nid, max_nid = -1;
+ int seq, nid, max_nid = NUMA_NO_NODE;
unsigned long max_faults = 0;
unsigned long fault_types[2] = { 0, 0 };
unsigned long total_faults;
@@ -2651,7 +2651,8 @@ static void update_scan_period(struct task_struct *p, int new_cpu)
* the preferred node.
*/
if (dst_nid == p->numa_preferred_nid ||
- (p->numa_preferred_nid != -1 && src_nid != p->numa_preferred_nid))
+ (p->numa_preferred_nid != NUMA_NO_NODE &&
+ src_nid != p->numa_preferred_nid))
return;
}
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index a43c601ac252..54a0347ca812 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -445,8 +445,8 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
* behavior of privileged children.
*/
if (!task_no_new_privs(current) &&
- security_capable_noaudit(current_cred(), current_user_ns(),
- CAP_SYS_ADMIN) != 0)
+ security_capable(current_cred(), current_user_ns(),
+ CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) != 0)
return ERR_PTR(-EACCES);
/* Allocate a new seccomp_filter */
diff --git a/kernel/sys.c b/kernel/sys.c
index f7eb62eceb24..12df0e5434b8 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -516,7 +516,7 @@ long __sys_setreuid(uid_t ruid, uid_t euid)
new->uid = kruid;
if (!uid_eq(old->uid, kruid) &&
!uid_eq(old->euid, kruid) &&
- !ns_capable(old->user_ns, CAP_SETUID))
+ !ns_capable_setid(old->user_ns, CAP_SETUID))
goto error;
}
@@ -525,7 +525,7 @@ long __sys_setreuid(uid_t ruid, uid_t euid)
if (!uid_eq(old->uid, keuid) &&
!uid_eq(old->euid, keuid) &&
!uid_eq(old->suid, keuid) &&
- !ns_capable(old->user_ns, CAP_SETUID))
+ !ns_capable_setid(old->user_ns, CAP_SETUID))
goto error;
}
@@ -584,7 +584,7 @@ long __sys_setuid(uid_t uid)
old = current_cred();
retval = -EPERM;
- if (ns_capable(old->user_ns, CAP_SETUID)) {
+ if (ns_capable_setid(old->user_ns, CAP_SETUID)) {
new->suid = new->uid = kuid;
if (!uid_eq(kuid, old->uid)) {
retval = set_user(new);
@@ -646,7 +646,7 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
old = current_cred();
retval = -EPERM;
- if (!ns_capable(old->user_ns, CAP_SETUID)) {
+ if (!ns_capable_setid(old->user_ns, CAP_SETUID)) {
if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) &&
!uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid))
goto error;
@@ -814,7 +814,7 @@ long __sys_setfsuid(uid_t uid)
if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) ||
uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) ||
- ns_capable(old->user_ns, CAP_SETUID)) {
+ ns_capable_setid(old->user_ns, CAP_SETUID)) {
if (!uid_eq(kuid, old->fsuid)) {
new->fsuid = kuid;
if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
@@ -1747,6 +1747,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
if (who == RUSAGE_CHILDREN)
break;
+ /* fall through */
case RUSAGE_SELF:
thread_group_cputime_adjusted(p, &tgutime, &tgstime);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 85e5ccec0955..51d7c6794bf1 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -48,6 +48,9 @@ COND_SYSCALL(io_pgetevents_time32);
COND_SYSCALL(io_pgetevents);
COND_SYSCALL_COMPAT(io_pgetevents_time32);
COND_SYSCALL_COMPAT(io_pgetevents);
+COND_SYSCALL(io_uring_setup);
+COND_SYSCALL(io_uring_enter);
+COND_SYSCALL(io_uring_register);
/* fs/xattr.c */
@@ -202,6 +205,7 @@ COND_SYSCALL(msgget);
COND_SYSCALL(old_msgctl);
COND_SYSCALL(msgctl);
COND_SYSCALL_COMPAT(msgctl);
+COND_SYSCALL_COMPAT(old_msgctl);
COND_SYSCALL(msgrcv);
COND_SYSCALL_COMPAT(msgrcv);
COND_SYSCALL(msgsnd);
@@ -212,6 +216,7 @@ COND_SYSCALL(semget);
COND_SYSCALL(old_semctl);
COND_SYSCALL(semctl);
COND_SYSCALL_COMPAT(semctl);
+COND_SYSCALL_COMPAT(old_semctl);
COND_SYSCALL(semtimedop);
COND_SYSCALL(semtimedop_time32);
COND_SYSCALL(semop);
@@ -221,6 +226,7 @@ COND_SYSCALL(shmget);
COND_SYSCALL(old_shmctl);
COND_SYSCALL(shmctl);
COND_SYSCALL_COMPAT(shmctl);
+COND_SYSCALL_COMPAT(old_shmctl);
COND_SYSCALL(shmat);
COND_SYSCALL_COMPAT(shmat);
COND_SYSCALL(shmdt);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7c2b9bc88ee8..3fb1405f3f8c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -67,6 +67,8 @@
#include <linux/bpf.h>
#include <linux/mount.h>
+#include "../lib/kstrtox.h"
+
#include <linux/uaccess.h>
#include <asm/processor.h>
@@ -127,6 +129,7 @@ static int __maybe_unused one = 1;
static int __maybe_unused two = 2;
static int __maybe_unused four = 4;
static unsigned long one_ul = 1;
+static unsigned long long_max = LONG_MAX;
static int one_hundred = 100;
static int one_thousand = 1000;
#ifdef CONFIG_PRINTK
@@ -1471,7 +1474,7 @@ static struct ctl_table vm_table[] = {
.data = &sysctl_extfrag_threshold,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = sysctl_extfrag_handler,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &min_extfrag_threshold,
.extra2 = &max_extfrag_threshold,
},
@@ -1747,6 +1750,8 @@ static struct ctl_table fs_table[] = {
.maxlen = sizeof(files_stat.max_files),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &long_max,
},
{
.procname = "nr_open",
@@ -2117,6 +2122,41 @@ static void proc_skip_char(char **buf, size_t *size, const char v)
}
}
+/**
+ * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
+ * fail on overflow
+ *
+ * @cp: kernel buffer containing the string to parse
+ * @endp: pointer to store the trailing characters
+ * @base: the base to use
+ * @res: where the parsed integer will be stored
+ *
+ * In case of success 0 is returned and @res will contain the parsed integer,
+ * @endp will hold any trailing characters.
+ * This function will fail the parse on overflow. If there wasn't an overflow
+ * the function will defer the decision what characters count as invalid to the
+ * caller.
+ */
+static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
+ unsigned long *res)
+{
+ unsigned long long result;
+ unsigned int rv;
+
+ cp = _parse_integer_fixup_radix(cp, &base);
+ rv = _parse_integer(cp, base, &result);
+ if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
+ return -ERANGE;
+
+ cp += rv;
+
+ if (endp)
+ *endp = (char *)cp;
+
+ *res = (unsigned long)result;
+ return 0;
+}
+
#define TMPBUFLEN 22
/**
* proc_get_long - reads an ASCII formatted integer from a user buffer
@@ -2160,7 +2200,8 @@ static int proc_get_long(char **buf, size_t *size,
if (!isdigit(*p))
return -EINVAL;
- *val = simple_strtoul(p, &p, 0);
+ if (strtoul_lenient(p, &p, 0, val))
+ return -EINVAL;
len = p - tmp;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 27821480105e..217ef481fbbb 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1301,7 +1301,7 @@ static int parse_pred(const char *str, void *data,
/* go past the last quote */
i++;
- } else if (isdigit(str[i])) {
+ } else if (isdigit(str[i]) || str[i] == '-') {
/* Make sure the field is not a string */
if (is_string_field(field)) {
@@ -1314,6 +1314,9 @@ static int parse_pred(const char *str, void *data,
goto err_free;
}
+ if (str[i] == '-')
+ i++;
+
/* We allow 0xDEADBEEF */
while (isalnum(str[i]))
i++;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 9eaf07f99212..99592c27465e 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -865,7 +865,7 @@ fetch_store_strlen(unsigned long addr)
u8 c;
do {
- ret = probe_mem_read(&c, (u8 *)addr + len, 1);
+ ret = probe_kernel_read(&c, (u8 *)addr + len, 1);
len++;
} while (c && ret == 0 && len < MAX_STRING_SIZE);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 977918d5d350..8fbfda94a67b 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -199,6 +199,13 @@ static int __init nosoftlockup_setup(char *str)
}
__setup("nosoftlockup", nosoftlockup_setup);
+static int __init watchdog_thresh_setup(char *str)
+{
+ get_option(&str, &watchdog_thresh);
+ return 1;
+}
+__setup("watchdog_thresh=", watchdog_thresh_setup);
+
#ifdef CONFIG_SMP
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e14da4f599ab..4026d1871407 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -648,7 +648,7 @@ static void set_work_pool_and_clear_pending(struct work_struct *work,
* The following mb guarantees that previous clear of a PENDING bit
* will not be reordered with any speculative LOADS or STORES from
* work->current_func, which is executed afterwards. This possible
- * reordering can lead to a missed execution on attempt to qeueue
+ * reordering can lead to a missed execution on attempt to queue
* the same @work. E.g. consider this case:
*
* CPU#0 CPU#1
@@ -920,6 +920,16 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task)
* CONTEXT:
* spin_lock_irq(rq->lock)
*
+ * This function is called during schedule() when a kworker is going
+ * to sleep. It's used by psi to identify aggregation workers during
+ * dequeuing, to allow periodic aggregation to shut-off when that
+ * worker is the last task in the system or cgroup to go to sleep.
+ *
+ * As this function doesn't involve any workqueue-related locking, it
+ * only returns stable values when called from inside the scheduler's
+ * queuing and dequeuing paths, when @task, which must be a kworker,
+ * is guaranteed to not be processing any works.
+ *
* Return:
* The last work function %current executed as a worker, NULL if it
* hasn't executed any work yet.
@@ -1343,7 +1353,7 @@ static bool is_chained_work(struct workqueue_struct *wq)
worker = current_wq_worker();
/*
- * Return %true iff I'm a worker execuing a work item on @wq. If
+ * Return %true iff I'm a worker executing a work item on @wq. If
* I'm @worker, it's safe to dereference it without locking.
*/
return worker && worker->current_pwq->wq == wq;
@@ -1514,6 +1524,90 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
}
EXPORT_SYMBOL(queue_work_on);
+/**
+ * workqueue_select_cpu_near - Select a CPU based on NUMA node
+ * @node: NUMA node ID that we want to select a CPU from
+ *
+ * This function will attempt to find a "random" cpu available on a given
+ * node. If there are no CPUs available on the given node it will return
+ * WORK_CPU_UNBOUND indicating that we should just schedule to any
+ * available CPU if we need to schedule this work.
+ */
+static int workqueue_select_cpu_near(int node)
+{
+ int cpu;
+
+ /* No point in doing this if NUMA isn't enabled for workqueues */
+ if (!wq_numa_enabled)
+ return WORK_CPU_UNBOUND;
+
+ /* Delay binding to CPU if node is not valid or online */
+ if (node < 0 || node >= MAX_NUMNODES || !node_online(node))
+ return WORK_CPU_UNBOUND;
+
+ /* Use local node/cpu if we are already there */
+ cpu = raw_smp_processor_id();
+ if (node == cpu_to_node(cpu))
+ return cpu;
+
+ /* Use "random" otherwise know as "first" online CPU of node */
+ cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
+
+ /* If CPU is valid return that, otherwise just defer */
+ return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND;
+}
+
+/**
+ * queue_work_node - queue work on a "random" cpu for a given NUMA node
+ * @node: NUMA node that we are targeting the work for
+ * @wq: workqueue to use
+ * @work: work to queue
+ *
+ * We queue the work to a "random" CPU within a given NUMA node. The basic
+ * idea here is to provide a way to somehow associate work with a given
+ * NUMA node.
+ *
+ * This function will only make a best effort attempt at getting this onto
+ * the right NUMA node. If no node is requested or the requested node is
+ * offline then we just fall back to standard queue_work behavior.
+ *
+ * Currently the "random" CPU ends up being the first available CPU in the
+ * intersection of cpu_online_mask and the cpumask of the node, unless we
+ * are running on the node. In that case we just use the current CPU.
+ *
+ * Return: %false if @work was already on a queue, %true otherwise.
+ */
+bool queue_work_node(int node, struct workqueue_struct *wq,
+ struct work_struct *work)
+{
+ unsigned long flags;
+ bool ret = false;
+
+ /*
+ * This current implementation is specific to unbound workqueues.
+ * Specifically we only return the first available CPU for a given
+ * node instead of cycling through individual CPUs within the node.
+ *
+ * If this is used with a per-cpu workqueue then the logic in
+ * workqueue_select_cpu_near would need to be updated to allow for
+ * some round robin type logic.
+ */
+ WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));
+
+ local_irq_save(flags);
+
+ if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
+ int cpu = workqueue_select_cpu_near(node);
+
+ __queue_work(cpu, wq, work);
+ ret = true;
+ }
+
+ local_irq_restore(flags);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(queue_work_node);
+
void delayed_work_timer_fn(struct timer_list *t)
{
struct delayed_work *dwork = from_timer(dwork, t, timer);
@@ -1641,7 +1735,7 @@ static void rcu_work_rcufn(struct rcu_head *rcu)
*
* Return: %false if @rwork was already pending, %true otherwise. Note
* that a full RCU grace period is guaranteed only after a %true return.
- * While @rwork is guarnateed to be executed after a %false return, the
+ * While @rwork is guaranteed to be executed after a %false return, the
* execution may happen before a full RCU grace period has passed.
*/
bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork)
@@ -2933,6 +3027,9 @@ static bool __flush_work(struct work_struct *work, bool from_cancel)
if (WARN_ON(!wq_online))
return false;
+ if (WARN_ON(!work->func))
+ return false;
+
if (!from_cancel) {
lock_map_acquire(&work->lockdep_map);
lock_map_release(&work->lockdep_map);