aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile101
-rw-r--r--kernel/audit.c2
-rw-r--r--kernel/audit.h18
-rw-r--r--kernel/audit_fsnotify.c216
-rw-r--r--kernel/audit_tree.c2
-rw-r--r--kernel/audit_watch.c56
-rw-r--r--kernel/auditfilter.c83
-rw-r--r--kernel/auditsc.c9
-rw-r--r--kernel/memremap.c190
-rw-r--r--kernel/module_signing.c213
-rw-r--r--kernel/ptrace.c13
-rw-r--r--kernel/resource.c61
-rw-r--r--kernel/seccomp.c17
-rw-r--r--kernel/system_certificates.S20
-rw-r--r--kernel/system_keyring.c106
-rw-r--r--kernel/trace/ftrace.c9
-rw-r--r--kernel/trace/ring_buffer.c764
-rw-r--r--kernel/trace/trace.c4
-rw-r--r--kernel/trace/trace_events.c25
-rw-r--r--kernel/trace/trace_events_filter.c54
-rw-r--r--kernel/trace/trace_functions_graph.c4
-rw-r--r--kernel/trace/trace_output.c4
-rw-r--r--kernel/trace/trace_stack.c68
23 files changed, 1140 insertions, 899 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 718fb8afab7a..e0d7587e7684 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -45,7 +45,6 @@ ifneq ($(CONFIG_SMP),y)
obj-y += up.o
endif
obj-$(CONFIG_UID16) += uid16.o
-obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_MODULE_SIG) += module_signing.o
obj-$(CONFIG_KALLSYMS) += kallsyms.o
@@ -65,7 +64,7 @@ obj-$(CONFIG_SMP) += stop_machine.o
obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
-obj-$(CONFIG_AUDIT_WATCH) += audit_watch.o
+obj-$(CONFIG_AUDIT_WATCH) += audit_watch.o audit_fsnotify.o
obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
obj-$(CONFIG_GCOV_KERNEL) += gcov/
obj-$(CONFIG_KPROBES) += kprobes.o
@@ -100,6 +99,8 @@ obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
obj-$(CONFIG_TORTURE_TEST) += torture.o
+obj-$(CONFIG_HAS_IOMEM) += memremap.o
+
$(obj)/configs.o: $(obj)/config_data.h
# config_data.h contains the same information as ikconfig.h but gzipped.
@@ -112,99 +113,3 @@ $(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
targets += config_data.h
$(obj)/config_data.h: $(obj)/config_data.gz FORCE
$(call filechk,ikconfiggz)
-
-###############################################################################
-#
-# Roll all the X.509 certificates that we can find together and pull them into
-# the kernel so that they get loaded into the system trusted keyring during
-# boot.
-#
-# We look in the source root and the build root for all files whose name ends
-# in ".x509". Unfortunately, this will generate duplicate filenames, so we
-# have make canonicalise the pathnames and then sort them to discard the
-# duplicates.
-#
-###############################################################################
-ifeq ($(CONFIG_SYSTEM_TRUSTED_KEYRING),y)
-X509_CERTIFICATES-y := $(wildcard *.x509) $(wildcard $(srctree)/*.x509)
-X509_CERTIFICATES-$(CONFIG_MODULE_SIG) += $(objtree)/signing_key.x509
-X509_CERTIFICATES-raw := $(sort $(foreach CERT,$(X509_CERTIFICATES-y), \
- $(or $(realpath $(CERT)),$(CERT))))
-X509_CERTIFICATES := $(subst $(realpath $(objtree))/,,$(X509_CERTIFICATES-raw))
-
-ifeq ($(X509_CERTIFICATES),)
-$(warning *** No X.509 certificates found ***)
-endif
-
-ifneq ($(wildcard $(obj)/.x509.list),)
-ifneq ($(shell cat $(obj)/.x509.list),$(X509_CERTIFICATES))
-$(warning X.509 certificate list changed to "$(X509_CERTIFICATES)" from "$(shell cat $(obj)/.x509.list)")
-$(shell rm $(obj)/.x509.list)
-endif
-endif
-
-kernel/system_certificates.o: $(obj)/x509_certificate_list
-
-quiet_cmd_x509certs = CERTS $@
- cmd_x509certs = cat $(X509_CERTIFICATES) /dev/null >$@ $(foreach X509,$(X509_CERTIFICATES),; $(kecho) " - Including cert $(X509)")
-
-targets += $(obj)/x509_certificate_list
-$(obj)/x509_certificate_list: $(X509_CERTIFICATES) $(obj)/.x509.list
- $(call if_changed,x509certs)
-
-targets += $(obj)/.x509.list
-$(obj)/.x509.list:
- @echo $(X509_CERTIFICATES) >$@
-endif
-
-clean-files := x509_certificate_list .x509.list
-
-ifeq ($(CONFIG_MODULE_SIG),y)
-###############################################################################
-#
-# If module signing is requested, say by allyesconfig, but a key has not been
-# supplied, then one will need to be generated to make sure the build does not
-# fail and that the kernel may be used afterwards.
-#
-###############################################################################
-ifndef CONFIG_MODULE_SIG_HASH
-$(error Could not determine digest type to use from kernel config)
-endif
-
-signing_key.priv signing_key.x509: x509.genkey
- @echo "###"
- @echo "### Now generating an X.509 key pair to be used for signing modules."
- @echo "###"
- @echo "### If this takes a long time, you might wish to run rngd in the"
- @echo "### background to keep the supply of entropy topped up. It"
- @echo "### needs to be run as root, and uses a hardware random"
- @echo "### number generator if one is available."
- @echo "###"
- openssl req -new -nodes -utf8 -$(CONFIG_MODULE_SIG_HASH) -days 36500 \
- -batch -x509 -config x509.genkey \
- -outform DER -out signing_key.x509 \
- -keyout signing_key.priv 2>&1
- @echo "###"
- @echo "### Key pair generated."
- @echo "###"
-
-x509.genkey:
- @echo Generating X.509 key generation config
- @echo >x509.genkey "[ req ]"
- @echo >>x509.genkey "default_bits = 4096"
- @echo >>x509.genkey "distinguished_name = req_distinguished_name"
- @echo >>x509.genkey "prompt = no"
- @echo >>x509.genkey "string_mask = utf8only"
- @echo >>x509.genkey "x509_extensions = myexts"
- @echo >>x509.genkey
- @echo >>x509.genkey "[ req_distinguished_name ]"
- @echo >>x509.genkey "#O = Unspecified company"
- @echo >>x509.genkey "CN = Build time autogenerated kernel key"
- @echo >>x509.genkey "#emailAddress = unspecified.user@unspecified.company"
- @echo >>x509.genkey
- @echo >>x509.genkey "[ myexts ]"
- @echo >>x509.genkey "basicConstraints=critical,CA:FALSE"
- @echo >>x509.genkey "keyUsage=digitalSignature"
- @echo >>x509.genkey "subjectKeyIdentifier=hash"
- @echo >>x509.genkey "authorityKeyIdentifier=keyid"
-endif
diff --git a/kernel/audit.c b/kernel/audit.c
index f9e6065346db..662c007635fb 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1761,7 +1761,7 @@ void audit_log_name(struct audit_context *context, struct audit_names *n,
} else
audit_log_format(ab, " name=(null)");
- if (n->ino != (unsigned long)-1)
+ if (n->ino != AUDIT_INO_UNSET)
audit_log_format(ab, " inode=%lu"
" dev=%02x:%02x mode=%#ho"
" ouid=%u ogid=%u rdev=%02x:%02x",
diff --git a/kernel/audit.h b/kernel/audit.h
index d641f9bb3ed0..dadf86a0e59e 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -50,6 +50,7 @@ enum audit_state {
/* Rule lists */
struct audit_watch;
+struct audit_fsnotify_mark;
struct audit_tree;
struct audit_chunk;
@@ -252,6 +253,7 @@ struct audit_net {
extern int selinux_audit_rule_update(void);
extern struct mutex audit_filter_mutex;
+extern int audit_del_rule(struct audit_entry *);
extern void audit_free_rule_rcu(struct rcu_head *);
extern struct list_head audit_filter_list[];
@@ -269,6 +271,15 @@ extern int audit_add_watch(struct audit_krule *krule, struct list_head **list);
extern void audit_remove_watch_rule(struct audit_krule *krule);
extern char *audit_watch_path(struct audit_watch *watch);
extern int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev);
+
+extern struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pathname, int len);
+extern char *audit_mark_path(struct audit_fsnotify_mark *mark);
+extern void audit_remove_mark(struct audit_fsnotify_mark *audit_mark);
+extern void audit_remove_mark_rule(struct audit_krule *krule);
+extern int audit_mark_compare(struct audit_fsnotify_mark *mark, unsigned long ino, dev_t dev);
+extern int audit_dupe_exe(struct audit_krule *new, struct audit_krule *old);
+extern int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark);
+
#else
#define audit_put_watch(w) {}
#define audit_get_watch(w) {}
@@ -278,6 +289,13 @@ extern int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev
#define audit_watch_path(w) ""
#define audit_watch_compare(w, i, d) 0
+#define audit_alloc_mark(k, p, l) (ERR_PTR(-EINVAL))
+#define audit_mark_path(m) ""
+#define audit_remove_mark(m)
+#define audit_remove_mark_rule(k)
+#define audit_mark_compare(m, i, d) 0
+#define audit_exe_compare(t, m) (-EINVAL)
+#define audit_dupe_exe(n, o) (-EINVAL)
#endif /* CONFIG_AUDIT_WATCH */
#ifdef CONFIG_AUDIT_TREE
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
new file mode 100644
index 000000000000..27c6046c2c3d
--- /dev/null
+++ b/kernel/audit_fsnotify.c
@@ -0,0 +1,216 @@
+/* audit_fsnotify.c -- tracking inodes
+ *
+ * Copyright 2003-2009,2014-2015 Red Hat, Inc.
+ * Copyright 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright 2005 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/audit.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <linux/fs.h>
+#include <linux/fsnotify_backend.h>
+#include <linux/namei.h>
+#include <linux/netlink.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/security.h>
+#include "audit.h"
+
+/*
+ * this mark lives on the parent directory of the inode in question.
+ * but dev, ino, and path are about the child
+ */
+struct audit_fsnotify_mark {
+ dev_t dev; /* associated superblock device */
+ unsigned long ino; /* associated inode number */
+ char *path; /* insertion path */
+ struct fsnotify_mark mark; /* fsnotify mark on the inode */
+ struct audit_krule *rule;
+};
+
+/* fsnotify handle. */
+static struct fsnotify_group *audit_fsnotify_group;
+
+/* fsnotify events we care about. */
+#define AUDIT_FS_EVENTS (FS_MOVE | FS_CREATE | FS_DELETE | FS_DELETE_SELF |\
+ FS_MOVE_SELF | FS_EVENT_ON_CHILD)
+
+static void audit_fsnotify_mark_free(struct audit_fsnotify_mark *audit_mark)
+{
+ kfree(audit_mark->path);
+ kfree(audit_mark);
+}
+
+static void audit_fsnotify_free_mark(struct fsnotify_mark *mark)
+{
+ struct audit_fsnotify_mark *audit_mark;
+
+ audit_mark = container_of(mark, struct audit_fsnotify_mark, mark);
+ audit_fsnotify_mark_free(audit_mark);
+}
+
+char *audit_mark_path(struct audit_fsnotify_mark *mark)
+{
+ return mark->path;
+}
+
+int audit_mark_compare(struct audit_fsnotify_mark *mark, unsigned long ino, dev_t dev)
+{
+ if (mark->ino == AUDIT_INO_UNSET)
+ return 0;
+ return (mark->ino == ino) && (mark->dev == dev);
+}
+
+static void audit_update_mark(struct audit_fsnotify_mark *audit_mark,
+ struct inode *inode)
+{
+ audit_mark->dev = inode ? inode->i_sb->s_dev : AUDIT_DEV_UNSET;
+ audit_mark->ino = inode ? inode->i_ino : AUDIT_INO_UNSET;
+}
+
+struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pathname, int len)
+{
+ struct audit_fsnotify_mark *audit_mark;
+ struct path path;
+ struct dentry *dentry;
+ struct inode *inode;
+ int ret;
+
+ if (pathname[0] != '/' || pathname[len-1] == '/')
+ return ERR_PTR(-EINVAL);
+
+ dentry = kern_path_locked(pathname, &path);
+ if (IS_ERR(dentry))
+ return (void *)dentry; /* returning an error */
+ inode = path.dentry->d_inode;
+ mutex_unlock(&inode->i_mutex);
+
+ audit_mark = kzalloc(sizeof(*audit_mark), GFP_KERNEL);
+ if (unlikely(!audit_mark)) {
+ audit_mark = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ fsnotify_init_mark(&audit_mark->mark, audit_fsnotify_free_mark);
+ audit_mark->mark.mask = AUDIT_FS_EVENTS;
+ audit_mark->path = pathname;
+ audit_update_mark(audit_mark, dentry->d_inode);
+ audit_mark->rule = krule;
+
+ ret = fsnotify_add_mark(&audit_mark->mark, audit_fsnotify_group, inode, NULL, true);
+ if (ret < 0) {
+ audit_fsnotify_mark_free(audit_mark);
+ audit_mark = ERR_PTR(ret);
+ }
+out:
+ dput(dentry);
+ path_put(&path);
+ return audit_mark;
+}
+
+static void audit_mark_log_rule_change(struct audit_fsnotify_mark *audit_mark, char *op)
+{
+ struct audit_buffer *ab;
+ struct audit_krule *rule = audit_mark->rule;
+
+ if (!audit_enabled)
+ return;
+ ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
+ if (unlikely(!ab))
+ return;
+ audit_log_format(ab, "auid=%u ses=%u op=",
+ from_kuid(&init_user_ns, audit_get_loginuid(current)),
+ audit_get_sessionid(current));
+ audit_log_string(ab, op);
+ audit_log_format(ab, " path=");
+ audit_log_untrustedstring(ab, audit_mark->path);
+ audit_log_key(ab, rule->filterkey);
+ audit_log_format(ab, " list=%d res=1", rule->listnr);
+ audit_log_end(ab);
+}
+
+void audit_remove_mark(struct audit_fsnotify_mark *audit_mark)
+{
+ fsnotify_destroy_mark(&audit_mark->mark, audit_fsnotify_group);
+ fsnotify_put_mark(&audit_mark->mark);
+}
+
+void audit_remove_mark_rule(struct audit_krule *krule)
+{
+ struct audit_fsnotify_mark *mark = krule->exe;
+
+ audit_remove_mark(mark);
+}
+
+static void audit_autoremove_mark_rule(struct audit_fsnotify_mark *audit_mark)
+{
+ struct audit_krule *rule = audit_mark->rule;
+ struct audit_entry *entry = container_of(rule, struct audit_entry, rule);
+
+ audit_mark_log_rule_change(audit_mark, "autoremove_rule");
+ audit_del_rule(entry);
+}
+
+/* Update mark data in audit rules based on fsnotify events. */
+static int audit_mark_handle_event(struct fsnotify_group *group,
+ struct inode *to_tell,
+ struct fsnotify_mark *inode_mark,
+ struct fsnotify_mark *vfsmount_mark,
+ u32 mask, void *data, int data_type,
+ const unsigned char *dname, u32 cookie)
+{
+ struct audit_fsnotify_mark *audit_mark;
+ struct inode *inode = NULL;
+
+ audit_mark = container_of(inode_mark, struct audit_fsnotify_mark, mark);
+
+ BUG_ON(group != audit_fsnotify_group);
+
+ switch (data_type) {
+ case (FSNOTIFY_EVENT_PATH):
+ inode = ((struct path *)data)->dentry->d_inode;
+ break;
+ case (FSNOTIFY_EVENT_INODE):
+ inode = (struct inode *)data;
+ break;
+ default:
+ BUG();
+ return 0;
+ };
+
+ if (mask & (FS_CREATE|FS_MOVED_TO|FS_DELETE|FS_MOVED_FROM)) {
+ if (audit_compare_dname_path(dname, audit_mark->path, AUDIT_NAME_FULL))
+ return 0;
+ audit_update_mark(audit_mark, inode);
+ } else if (mask & (FS_DELETE_SELF|FS_UNMOUNT|FS_MOVE_SELF))
+ audit_autoremove_mark_rule(audit_mark);
+
+ return 0;
+}
+
+static const struct fsnotify_ops audit_mark_fsnotify_ops = {
+ .handle_event = audit_mark_handle_event,
+};
+
+static int __init audit_fsnotify_init(void)
+{
+ audit_fsnotify_group = fsnotify_alloc_group(&audit_mark_fsnotify_ops);
+ if (IS_ERR(audit_fsnotify_group)) {
+ audit_fsnotify_group = NULL;
+ audit_panic("cannot create audit fsnotify group");
+ }
+ return 0;
+}
+device_initcall(audit_fsnotify_init);
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index b0f9877273fc..94ecdabda8e6 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -479,6 +479,8 @@ static void kill_rules(struct audit_tree *tree)
if (rule->tree) {
/* not a half-baked one */
audit_tree_log_remove_rule(rule);
+ if (entry->rule.exe)
+ audit_remove_mark(entry->rule.exe);
rule->tree = NULL;
list_del_rcu(&entry->list);
list_del(&entry->rule.list);
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 6e30024d9aac..656c7e93ac0d 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -138,7 +138,7 @@ char *audit_watch_path(struct audit_watch *watch)
int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev)
{
- return (watch->ino != (unsigned long)-1) &&
+ return (watch->ino != AUDIT_INO_UNSET) &&
(watch->ino == ino) &&
(watch->dev == dev);
}
@@ -179,8 +179,8 @@ static struct audit_watch *audit_init_watch(char *path)
INIT_LIST_HEAD(&watch->rules);
atomic_set(&watch->count, 1);
watch->path = path;
- watch->dev = (dev_t)-1;
- watch->ino = (unsigned long)-1;
+ watch->dev = AUDIT_DEV_UNSET;
+ watch->ino = AUDIT_INO_UNSET;
return watch;
}
@@ -203,7 +203,6 @@ int audit_to_watch(struct audit_krule *krule, char *path, int len, u32 op)
if (IS_ERR(watch))
return PTR_ERR(watch);
- audit_get_watch(watch);
krule->watch = watch;
return 0;
@@ -313,6 +312,8 @@ static void audit_update_watch(struct audit_parent *parent,
list_replace(&oentry->rule.list,
&nentry->rule.list);
}
+ if (oentry->rule.exe)
+ audit_remove_mark(oentry->rule.exe);
audit_watch_log_rule_change(r, owatch, "updated_rules");
@@ -343,6 +344,8 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
list_for_each_entry_safe(r, nextr, &w->rules, rlist) {
e = container_of(r, struct audit_entry, rule);
audit_watch_log_rule_change(r, w, "remove_rule");
+ if (e->rule.exe)
+ audit_remove_mark(e->rule.exe);
list_del(&r->rlist);
list_del(&r->list);
list_del_rcu(&e->list);
@@ -387,19 +390,20 @@ static void audit_add_to_parent(struct audit_krule *krule,
watch_found = 1;
- /* put krule's and initial refs to temporary watch */
- audit_put_watch(watch);
+ /* put krule's ref to temporary watch */
audit_put_watch(watch);
audit_get_watch(w);
krule->watch = watch = w;
+
+ audit_put_parent(parent);
break;
}
if (!watch_found) {
- audit_get_parent(parent);
watch->parent = parent;
+ audit_get_watch(watch);
list_add(&watch->wlist, &parent->watches);
}
list_add(&krule->rlist, &watch->rules);
@@ -437,9 +441,6 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
audit_add_to_parent(krule, parent);
- /* match get in audit_find_parent or audit_init_parent */
- audit_put_parent(parent);
-
h = audit_hash_ino((u32)watch->ino);
*list = &audit_inode_hash[h];
error:
@@ -496,7 +497,7 @@ static int audit_watch_handle_event(struct fsnotify_group *group,
if (mask & (FS_CREATE|FS_MOVED_TO) && inode)
audit_update_watch(parent, dname, inode->i_sb->s_dev, inode->i_ino, 0);
else if (mask & (FS_DELETE|FS_MOVED_FROM))
- audit_update_watch(parent, dname, (dev_t)-1, (unsigned long)-1, 1);
+ audit_update_watch(parent, dname, AUDIT_DEV_UNSET, AUDIT_INO_UNSET, 1);
else if (mask & (FS_DELETE_SELF|FS_UNMOUNT|FS_MOVE_SELF))
audit_remove_parent_watches(parent);
@@ -517,3 +518,36 @@ static int __init audit_watch_init(void)
return 0;
}
device_initcall(audit_watch_init);
+
+int audit_dupe_exe(struct audit_krule *new, struct audit_krule *old)
+{
+ struct audit_fsnotify_mark *audit_mark;
+ char *pathname;
+
+ pathname = kstrdup(audit_mark_path(old->exe), GFP_KERNEL);
+ if (!pathname)
+ return -ENOMEM;
+
+ audit_mark = audit_alloc_mark(new, pathname, strlen(pathname));
+ if (IS_ERR(audit_mark)) {
+ kfree(pathname);
+ return PTR_ERR(audit_mark);
+ }
+ new->exe = audit_mark;
+
+ return 0;
+}
+
+int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark)
+{
+ struct file *exe_file;
+ unsigned long ino;
+ dev_t dev;
+
+ rcu_read_lock();
+ exe_file = rcu_dereference(tsk->mm->exe_file);
+ ino = exe_file->f_inode->i_ino;
+ dev = exe_file->f_inode->i_sb->s_dev;
+ rcu_read_unlock();
+ return audit_mark_compare(mark, ino, dev);
+}
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 72e1660a79a3..7714d93edb85 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -405,6 +405,12 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
if (f->val > AUDIT_MAX_FIELD_COMPARE)
return -EINVAL;
break;
+ case AUDIT_EXE:
+ if (f->op != Audit_equal)
+ return -EINVAL;
+ if (entry->rule.listnr != AUDIT_FILTER_EXIT)
+ return -EINVAL;
+ break;
};
return 0;
}
@@ -419,6 +425,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
size_t remain = datasz - sizeof(struct audit_rule_data);
int i;
char *str;
+ struct audit_fsnotify_mark *audit_mark;
entry = audit_to_entry_common(data);
if (IS_ERR(entry))
@@ -539,6 +546,24 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
entry->rule.buflen += f->val;
entry->rule.filterkey = str;
break;
+ case AUDIT_EXE:
+ if (entry->rule.exe || f->val > PATH_MAX)
+ goto exit_free;
+ str = audit_unpack_string(&bufp, &remain, f->val);
+ if (IS_ERR(str)) {
+ err = PTR_ERR(str);
+ goto exit_free;
+ }
+ entry->rule.buflen += f->val;
+
+ audit_mark = audit_alloc_mark(&entry->rule, str, f->val);
+ if (IS_ERR(audit_mark)) {
+ kfree(str);
+ err = PTR_ERR(audit_mark);
+ goto exit_free;
+ }
+ entry->rule.exe = audit_mark;
+ break;
}
}
@@ -549,10 +574,10 @@ exit_nofree:
return entry;
exit_free:
- if (entry->rule.watch)
- audit_put_watch(entry->rule.watch); /* matches initial get */
if (entry->rule.tree)
audit_put_tree(entry->rule.tree); /* that's the temporary one */
+ if (entry->rule.exe)
+ audit_remove_mark(entry->rule.exe); /* that's the template one */
audit_free_rule(entry);
return ERR_PTR(err);
}
@@ -617,6 +642,10 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
data->buflen += data->values[i] =
audit_pack_string(&bufp, krule->filterkey);
break;
+ case AUDIT_EXE:
+ data->buflen += data->values[i] =
+ audit_pack_string(&bufp, audit_mark_path(krule->exe));
+ break;
case AUDIT_LOGINUID_SET:
if (krule->pflags & AUDIT_LOGINUID_LEGACY && !f->val) {
data->fields[i] = AUDIT_LOGINUID;
@@ -680,6 +709,12 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
if (strcmp(a->filterkey, b->filterkey))
return 1;
break;
+ case AUDIT_EXE:
+ /* both paths exist based on above type compare */
+ if (strcmp(audit_mark_path(a->exe),
+ audit_mark_path(b->exe)))
+ return 1;
+ break;
case AUDIT_UID:
case AUDIT_EUID:
case AUDIT_SUID:
@@ -801,8 +836,14 @@ struct audit_entry *audit_dupe_rule(struct audit_krule *old)
err = -ENOMEM;
else
new->filterkey = fk;
+ break;
+ case AUDIT_EXE:
+ err = audit_dupe_exe(new, old);
+ break;
}
if (err) {
+ if (new->exe)
+ audit_remove_mark(new->exe);
audit_free_rule(entry);
return ERR_PTR(err);
}
@@ -863,7 +904,7 @@ static inline int audit_add_rule(struct audit_entry *entry)
struct audit_watch *watch = entry->rule.watch;
struct audit_tree *tree = entry->rule.tree;
struct list_head *list;
- int err;
+ int err = 0;
#ifdef CONFIG_AUDITSYSCALL
int dont_count = 0;
@@ -881,7 +922,7 @@ static inline int audit_add_rule(struct audit_entry *entry)
/* normally audit_add_tree_rule() will free it on failure */
if (tree)
audit_put_tree(tree);
- goto error;
+ return err;
}
if (watch) {
@@ -895,14 +936,14 @@ static inline int audit_add_rule(struct audit_entry *entry)
*/
if (tree)
audit_put_tree(tree);
- goto error;
+ return err;
}
}
if (tree) {
err = audit_add_tree_rule(&entry->rule);
if (err) {
mutex_unlock(&audit_filter_mutex);
- goto error;
+ return err;
}
}
@@ -933,19 +974,13 @@ static inline int audit_add_rule(struct audit_entry *entry)
#endif
mutex_unlock(&audit_filter_mutex);
- return 0;
-
-error:
- if (watch)
- audit_put_watch(watch); /* tmp watch, matches initial get */
return err;
}
/* Remove an existing rule from filterlist. */
-static inline int audit_del_rule(struct audit_entry *entry)
+int audit_del_rule(struct audit_entry *entry)
{
struct audit_entry *e;
- struct audit_watch *watch = entry->rule.watch;
struct audit_tree *tree = entry->rule.tree;
struct list_head *list;
int ret = 0;
@@ -961,7 +996,6 @@ static inline int audit_del_rule(struct audit_entry *entry)
mutex_lock(&audit_filter_mutex);
e = audit_find_rule(entry, &list);
if (!e) {
- mutex_unlock(&audit_filter_mutex);
ret = -ENOENT;
goto out;
}
@@ -972,9 +1006,8 @@ static inline int audit_del_rule(struct audit_entry *entry)
if (e->rule.tree)
audit_remove_tree_rule(&e->rule);
- list_del_rcu(&e->list);
- list_del(&e->rule.list);
- call_rcu(&e->rcu, audit_free_rule_rcu);
+ if (e->rule.exe)
+ audit_remove_mark_rule(&e->rule);
#ifdef CONFIG_AUDITSYSCALL
if (!dont_count)
@@ -983,11 +1016,14 @@ static inline int audit_del_rule(struct audit_entry *entry)
if (!audit_match_signal(entry))
audit_signals--;
#endif
- mutex_unlock(&audit_filter_mutex);
+
+ list_del_rcu(&e->list);
+ list_del(&e->rule.list);
+ call_rcu(&e->rcu, audit_free_rule_rcu);
out:
- if (watch)
- audit_put_watch(watch); /* match initial get */
+ mutex_unlock(&audit_filter_mutex);
+
if (tree)
audit_put_tree(tree); /* that's the temporary one */
@@ -1077,8 +1113,11 @@ int audit_rule_change(int type, __u32 portid, int seq, void *data,
WARN_ON(1);
}
- if (err || type == AUDIT_DEL_RULE)
+ if (err || type == AUDIT_DEL_RULE) {
+ if (entry->rule.exe)
+ audit_remove_mark(entry->rule.exe);
audit_free_rule(entry);
+ }
return err;
}
@@ -1370,6 +1409,8 @@ static int update_lsm_rule(struct audit_krule *r)
return 0;
nentry = audit_dupe_rule(r);
+ if (entry->rule.exe)
+ audit_remove_mark(entry->rule.exe);
if (IS_ERR(nentry)) {
/* save the first error encountered for the
* return value */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e85bdfd15fed..b86cc04959de 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -180,7 +180,7 @@ static int audit_match_filetype(struct audit_context *ctx, int val)
return 0;
list_for_each_entry(n, &ctx->names_list, list) {
- if ((n->ino != -1) &&
+ if ((n->ino != AUDIT_INO_UNSET) &&
((n->mode & S_IFMT) == mode))
return 1;
}
@@ -466,6 +466,9 @@ static int audit_filter_rules(struct task_struct *tsk,
result = audit_comparator(ctx->ppid, f->op, f->val);
}
break;
+ case AUDIT_EXE:
+ result = audit_exe_compare(tsk, rule->exe);
+ break;
case AUDIT_UID:
result = audit_uid_comparator(cred->uid, f->op, f->uid);
break;
@@ -1680,7 +1683,7 @@ static struct audit_names *audit_alloc_name(struct audit_context *context,
aname->should_free = true;
}
- aname->ino = (unsigned long)-1;
+ aname->ino = AUDIT_INO_UNSET;
aname->type = type;
list_add_tail(&aname->list, &context->names_list);
@@ -1922,7 +1925,7 @@ void __audit_inode_child(const struct inode *parent,
if (inode)
audit_copy_inode(found_child, dentry, inode);
else
- found_child->ino = (unsigned long)-1;
+ found_child->ino = AUDIT_INO_UNSET;
}
EXPORT_SYMBOL_GPL(__audit_inode_child);
diff --git a/kernel/memremap.c b/kernel/memremap.c
new file mode 100644
index 000000000000..72b0c66628b6
--- /dev/null
+++ b/kernel/memremap.c
@@ -0,0 +1,190 @@
+/*
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/device.h>
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/memory_hotplug.h>
+
+#ifndef ioremap_cache
+/* temporary while we convert existing ioremap_cache users to memremap */
+__weak void __iomem *ioremap_cache(resource_size_t offset, unsigned long size)
+{
+ return ioremap(offset, size);
+}
+#endif
+
+/**
+ * memremap() - remap an iomem_resource as cacheable memory
+ * @offset: iomem resource start address
+ * @size: size of remap
+ * @flags: either MEMREMAP_WB or MEMREMAP_WT
+ *
+ * memremap() is "ioremap" for cases where it is known that the resource
+ * being mapped does not have i/o side effects and the __iomem
+ * annotation is not applicable.
+ *
+ * MEMREMAP_WB - matches the default mapping for "System RAM" on
+ * the architecture. This is usually a read-allocate write-back cache.
+ * Morever, if MEMREMAP_WB is specified and the requested remap region is RAM
+ * memremap() will bypass establishing a new mapping and instead return
+ * a pointer into the direct map.
+ *
+ * MEMREMAP_WT - establish a mapping whereby writes either bypass the
+ * cache or are written through to memory and never exist in a
+ * cache-dirty state with respect to program visibility. Attempts to
+ * map "System RAM" with this mapping type will fail.
+ */
+void *memremap(resource_size_t offset, size_t size, unsigned long flags)
+{
+ int is_ram = region_intersects(offset, size, "System RAM");
+ void *addr = NULL;
+
+ if (is_ram == REGION_MIXED) {
+ WARN_ONCE(1, "memremap attempted on mixed range %pa size: %#lx\n",
+ &offset, (unsigned long) size);
+ return NULL;
+ }
+
+ /* Try all mapping types requested until one returns non-NULL */
+ if (flags & MEMREMAP_WB) {
+ flags &= ~MEMREMAP_WB;
+ /*
+ * MEMREMAP_WB is special in that it can be satisifed
+ * from the direct map. Some archs depend on the
+ * capability of memremap() to autodetect cases where
+ * the requested range is potentially in "System RAM"
+ */
+ if (is_ram == REGION_INTERSECTS)
+ addr = __va(offset);
+ else
+ addr = ioremap_cache(offset, size);
+ }
+
+ /*
+ * If we don't have a mapping yet and more request flags are
+ * pending then we will be attempting to establish a new virtual
+ * address mapping. Enforce that this mapping is not aliasing
+ * "System RAM"
+ */
+ if (!addr && is_ram == REGION_INTERSECTS && flags) {
+ WARN_ONCE(1, "memremap attempted on ram %pa size: %#lx\n",
+ &offset, (unsigned long) size);
+ return NULL;
+ }
+
+ if (!addr && (flags & MEMREMAP_WT)) {
+ flags &= ~MEMREMAP_WT;
+ addr = ioremap_wt(offset, size);
+ }
+
+ return addr;
+}
+EXPORT_SYMBOL(memremap);
+
+void memunmap(void *addr)
+{
+ if (is_vmalloc_addr(addr))
+ iounmap((void __iomem *) addr);
+}
+EXPORT_SYMBOL(memunmap);
+
+static void devm_memremap_release(struct device *dev, void *res)
+{
+ memunmap(res);
+}
+
+static int devm_memremap_match(struct device *dev, void *res, void *match_data)
+{
+ return *(void **)res == match_data;
+}
+
+void *devm_memremap(struct device *dev, resource_size_t offset,
+ size_t size, unsigned long flags)
+{
+ void **ptr, *addr;
+
+ ptr = devres_alloc(devm_memremap_release, sizeof(*ptr), GFP_KERNEL);
+ if (!ptr)
+ return NULL;
+
+ addr = memremap(offset, size, flags);
+ if (addr) {
+ *ptr = addr;
+ devres_add(dev, ptr);
+ } else
+ devres_free(ptr);
+
+ return addr;
+}
+EXPORT_SYMBOL(devm_memremap);
+
+void devm_memunmap(struct device *dev, void *addr)
+{
+ WARN_ON(devres_destroy(dev, devm_memremap_release, devm_memremap_match,
+ addr));
+ memunmap(addr);
+}
+EXPORT_SYMBOL(devm_memunmap);
+
+#ifdef CONFIG_ZONE_DEVICE
+struct page_map {
+ struct resource res;
+};
+
+static void devm_memremap_pages_release(struct device *dev, void *res)
+{
+ struct page_map *page_map = res;
+
+ /* pages are dead and unused, undo the arch mapping */
+ arch_remove_memory(page_map->res.start, resource_size(&page_map->res));
+}
+
+void *devm_memremap_pages(struct device *dev, struct resource *res)
+{
+ int is_ram = region_intersects(res->start, resource_size(res),
+ "System RAM");
+ struct page_map *page_map;
+ int error, nid;
+
+ if (is_ram == REGION_MIXED) {
+ WARN_ONCE(1, "%s attempted on mixed region %pr\n",
+ __func__, res);
+ return ERR_PTR(-ENXIO);
+ }
+
+ if (is_ram == REGION_INTERSECTS)
+ return __va(res->start);
+
+ page_map = devres_alloc(devm_memremap_pages_release,
+ sizeof(*page_map), GFP_KERNEL);
+ if (!page_map)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(&page_map->res, res, sizeof(*res));
+
+ nid = dev_to_node(dev);
+ if (nid < 0)
+ nid = 0;
+
+ error = arch_add_memory(nid, res->start, resource_size(res), true);
+ if (error) {
+ devres_free(page_map);
+ return ERR_PTR(error);
+ }
+
+ devres_add(dev, page_map);
+ return __va(res->start);
+}
+EXPORT_SYMBOL(devm_memremap_pages);
+#endif /* CONFIG_ZONE_DEVICE */
diff --git a/kernel/module_signing.c b/kernel/module_signing.c
index be5b8fac4bd0..bd62f5cda746 100644
--- a/kernel/module_signing.c
+++ b/kernel/module_signing.c
@@ -10,11 +10,8 @@
*/
#include <linux/kernel.h>
-#include <linux/err.h>
-#include <crypto/public_key.h>
-#include <crypto/hash.h>
-#include <keys/asymmetric-type.h>
#include <keys/system_keyring.h>
+#include <crypto/public_key.h>
#include "module-internal.h"
/*
@@ -28,170 +25,22 @@
* - Information block
*/
struct module_signature {
- u8 algo; /* Public-key crypto algorithm [enum pkey_algo] */
- u8 hash; /* Digest algorithm [enum hash_algo] */
- u8 id_type; /* Key identifier type [enum pkey_id_type] */
- u8 signer_len; /* Length of signer's name */
- u8 key_id_len; /* Length of key identifier */
+ u8 algo; /* Public-key crypto algorithm [0] */
+ u8 hash; /* Digest algorithm [0] */
+ u8 id_type; /* Key identifier type [PKEY_ID_PKCS7] */
+ u8 signer_len; /* Length of signer's name [0] */
+ u8 key_id_len; /* Length of key identifier [0] */
u8 __pad[3];
__be32 sig_len; /* Length of signature data */
};
/*
- * Digest the module contents.
- */
-static struct public_key_signature *mod_make_digest(enum hash_algo hash,
- const void *mod,
- unsigned long modlen)
-{
- struct public_key_signature *pks;
- struct crypto_shash *tfm;
- struct shash_desc *desc;
- size_t digest_size, desc_size;
- int ret;
-
- pr_devel("==>%s()\n", __func__);
-
- /* Allocate the hashing algorithm we're going to need and find out how
- * big the hash operational data will be.
- */
- tfm = crypto_alloc_shash(hash_algo_name[hash], 0, 0);
- if (IS_ERR(tfm))
- return (PTR_ERR(tfm) == -ENOENT) ? ERR_PTR(-ENOPKG) : ERR_CAST(tfm);
-
- desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
- digest_size = crypto_shash_digestsize(tfm);
-
- /* We allocate the hash operational data storage on the end of our
- * context data and the digest output buffer on the end of that.
- */
- ret = -ENOMEM;
- pks = kzalloc(digest_size + sizeof(*pks) + desc_size, GFP_KERNEL);
- if (!pks)
- goto error_no_pks;
-
- pks->pkey_hash_algo = hash;
- pks->digest = (u8 *)pks + sizeof(*pks) + desc_size;
- pks->digest_size = digest_size;
-
- desc = (void *)pks + sizeof(*pks);
- desc->tfm = tfm;
- desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
-
- ret = crypto_shash_init(desc);
- if (ret < 0)
- goto error;
-
- ret = crypto_shash_finup(desc, mod, modlen, pks->digest);
- if (ret < 0)
- goto error;
-
- crypto_free_shash(tfm);
- pr_devel("<==%s() = ok\n", __func__);
- return pks;
-
-error:
- kfree(pks);
-error_no_pks:
- crypto_free_shash(tfm);
- pr_devel("<==%s() = %d\n", __func__, ret);
- return ERR_PTR(ret);
-}
-
-/*
- * Extract an MPI array from the signature data. This represents the actual
- * signature. Each raw MPI is prefaced by a BE 2-byte value indicating the
- * size of the MPI in bytes.
- *
- * RSA signatures only have one MPI, so currently we only read one.
- */
-static int mod_extract_mpi_array(struct public_key_signature *pks,
- const void *data, size_t len)
-{
- size_t nbytes;
- MPI mpi;
-
- if (len < 3)
- return -EBADMSG;
- nbytes = ((const u8 *)data)[0] << 8 | ((const u8 *)data)[1];
- data += 2;
- len -= 2;
- if (len != nbytes)
- return -EBADMSG;
-
- mpi = mpi_read_raw_data(data, nbytes);
- if (!mpi)
- return -ENOMEM;
- pks->mpi[0] = mpi;
- pks->nr_mpi = 1;
- return 0;
-}
-
-/*
- * Request an asymmetric key.
- */
-static struct key *request_asymmetric_key(const char *signer, size_t signer_len,
- const u8 *key_id, size_t key_id_len)
-{
- key_ref_t key;
- size_t i;
- char *id, *q;
-
- pr_devel("==>%s(,%zu,,%zu)\n", __func__, signer_len, key_id_len);
-
- /* Construct an identifier. */
- id = kmalloc(signer_len + 2 + key_id_len * 2 + 1, GFP_KERNEL);
- if (!id)
- return ERR_PTR(-ENOKEY);
-
- memcpy(id, signer, signer_len);
-
- q = id + signer_len;
- *q++ = ':';
- *q++ = ' ';
- for (i = 0; i < key_id_len; i++) {
- *q++ = hex_asc[*key_id >> 4];
- *q++ = hex_asc[*key_id++ & 0x0f];
- }
-
- *q = 0;
-
- pr_debug("Look up: \"%s\"\n", id);
-
- key = keyring_search(make_key_ref(system_trusted_keyring, 1),
- &key_type_asymmetric, id);
- if (IS_ERR(key))
- pr_warn("Request for unknown module key '%s' err %ld\n",
- id, PTR_ERR(key));
- kfree(id);
-
- if (IS_ERR(key)) {
- switch (PTR_ERR(key)) {
- /* Hide some search errors */
- case -EACCES:
- case -ENOTDIR:
- case -EAGAIN:
- return ERR_PTR(-ENOKEY);
- default:
- return ERR_CAST(key);
- }
- }
-
- pr_devel("<==%s() = 0 [%x]\n", __func__, key_serial(key_ref_to_ptr(key)));
- return key_ref_to_ptr(key);
-}
-
-/*
* Verify the signature on a module.
*/
int mod_verify_sig(const void *mod, unsigned long *_modlen)
{
- struct public_key_signature *pks;
struct module_signature ms;
- struct key *key;
- const void *sig;
size_t modlen = *_modlen, sig_len;
- int ret;
pr_devel("==>%s(,%zu)\n", __func__, modlen);
@@ -205,46 +54,24 @@ int mod_verify_sig(const void *mod, unsigned long *_modlen)
if (sig_len >= modlen)
return -EBADMSG;
modlen -= sig_len;
- if ((size_t)ms.signer_len + ms.key_id_len >= modlen)
- return -EBADMSG;
- modlen -= (size_t)ms.signer_len + ms.key_id_len;
-
*_modlen = modlen;
- sig = mod + modlen;
-
- /* For the moment, only support RSA and X.509 identifiers */
- if (ms.algo != PKEY_ALGO_RSA ||
- ms.id_type != PKEY_ID_X509)
- return -ENOPKG;
- if (ms.hash >= PKEY_HASH__LAST ||
- !hash_algo_name[ms.hash])
+ if (ms.id_type != PKEY_ID_PKCS7) {
+ pr_err("Module is not signed with expected PKCS#7 message\n");
return -ENOPKG;
-
- key = request_asymmetric_key(sig, ms.signer_len,
- sig + ms.signer_len, ms.key_id_len);
- if (IS_ERR(key))
- return PTR_ERR(key);
-
- pks = mod_make_digest(ms.hash, mod, modlen);
- if (IS_ERR(pks)) {
- ret = PTR_ERR(pks);
- goto error_put_key;
}
- ret = mod_extract_mpi_array(pks, sig + ms.signer_len + ms.key_id_len,
- sig_len);
- if (ret < 0)
- goto error_free_pks;
-
- ret = verify_signature(key, pks);
- pr_devel("verify_signature() = %d\n", ret);
+ if (ms.algo != 0 ||
+ ms.hash != 0 ||
+ ms.signer_len != 0 ||
+ ms.key_id_len != 0 ||
+ ms.__pad[0] != 0 ||
+ ms.__pad[1] != 0 ||
+ ms.__pad[2] != 0) {
+ pr_err("PKCS#7 signature info has unexpected non-zero params\n");
+ return -EBADMSG;
+ }
-error_free_pks:
- mpi_free(pks->rsa.s);
- kfree(pks);
-error_put_key:
- key_put(key);
- pr_devel("<==%s() = %d\n", __func__, ret);
- return ret;
+ return system_verify_data(mod, modlen, mod + modlen, sig_len,
+ VERIFYING_MODULE_SIGNATURE);
}
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index c8e0e050a36a..787320de68e0 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -556,6 +556,19 @@ static int ptrace_setoptions(struct task_struct *child, unsigned long data)
if (data & ~(unsigned long)PTRACE_O_MASK)
return -EINVAL;
+ if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) {
+ if (!config_enabled(CONFIG_CHECKPOINT_RESTORE) ||
+ !config_enabled(CONFIG_SECCOMP))
+ return -EINVAL;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (seccomp_mode(&current->seccomp) != SECCOMP_MODE_DISABLED ||
+ current->ptrace & PT_SUSPEND_SECCOMP)
+ return -EPERM;
+ }
+
/* Avoid intermediate state when all opts are cleared */
flags = child->ptrace;
flags &= ~(PTRACE_O_MASK << PT_OPT_FLAG_SHIFT);
diff --git a/kernel/resource.c b/kernel/resource.c
index fed052a1bc9f..f150dbbe6f62 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -492,40 +492,51 @@ int __weak page_is_ram(unsigned long pfn)
}
EXPORT_SYMBOL_GPL(page_is_ram);
-/*
- * Search for a resouce entry that fully contains the specified region.
- * If found, return 1 if it is RAM, 0 if not.
- * If not found, or region is not fully contained, return -1
+/**
+ * region_intersects() - determine intersection of region with known resources
+ * @start: region start address
+ * @size: size of region
+ * @name: name of resource (in iomem_resource)
*
- * Used by the ioremap functions to ensure the user is not remapping RAM and is
- * a vast speed up over walking through the resource table page by page.
+ * Check if the specified region partially overlaps or fully eclipses a
+ * resource identified by @name. Return REGION_DISJOINT if the region
+ * does not overlap @name, return REGION_MIXED if the region overlaps
+ * @type and another resource, and return REGION_INTERSECTS if the
+ * region overlaps @type and no other defined resource. Note, that
+ * REGION_INTERSECTS is also returned in the case when the specified
+ * region overlaps RAM and undefined memory holes.
+ *
+ * region_intersect() is used by memory remapping functions to ensure
+ * the user is not remapping RAM and is a vast speed up over walking
+ * through the resource table page by page.
*/
-int region_is_ram(resource_size_t start, unsigned long size)
+int region_intersects(resource_size_t start, size_t size, const char *name)
{
- struct resource *p;
- resource_size_t end = start + size - 1;
unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- const char *name = "System RAM";
- int ret = -1;
+ resource_size_t end = start + size - 1;
+ int type = 0; int other = 0;
+ struct resource *p;
read_lock(&resource_lock);
for (p = iomem_resource.child; p ; p = p->sibling) {
- if (p->end < start)
- continue;
-
- if (p->start <= start && end <= p->end) {
- /* resource fully contains region */
- if ((p->flags != flags) || strcmp(p->name, name))
- ret = 0;
- else
- ret = 1;
- break;
- }
- if (end < p->start)
- break; /* not found */
+ bool is_type = strcmp(p->name, name) == 0 && p->flags == flags;
+
+ if (start >= p->start && start <= p->end)
+ is_type ? type++ : other++;
+ if (end >= p->start && end <= p->end)
+ is_type ? type++ : other++;
+ if (p->start >= start && p->end <= end)
+ is_type ? type++ : other++;
}
read_unlock(&resource_lock);
- return ret;
+
+ if (other == 0)
+ return type ? REGION_INTERSECTS : REGION_DISJOINT;
+
+ if (type)
+ return REGION_MIXED;
+
+ return REGION_DISJOINT;
}
void __weak arch_remove_reservations(struct resource *avail)
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 245df6b32b81..5bd4779282df 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -175,17 +175,16 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
*/
static u32 seccomp_run_filters(struct seccomp_data *sd)
{
- struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
struct seccomp_data sd_local;
u32 ret = SECCOMP_RET_ALLOW;
+ /* Make sure cross-thread synced filter points somewhere sane. */
+ struct seccomp_filter *f =
+ lockless_dereference(current->seccomp.filter);
/* Ensure unexpected behavior doesn't result in failing open. */
if (unlikely(WARN_ON(f == NULL)))
return SECCOMP_RET_KILL;
- /* Make sure cross-thread synced filter points somewhere sane. */
- smp_read_barrier_depends();
-
if (!sd) {
populate_seccomp_data(&sd_local);
sd = &sd_local;
@@ -549,7 +548,11 @@ void secure_computing_strict(int this_syscall)
{
int mode = current->seccomp.mode;
- if (mode == 0)
+ if (config_enabled(CONFIG_CHECKPOINT_RESTORE) &&
+ unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
+ return;
+
+ if (mode == SECCOMP_MODE_DISABLED)
return;
else if (mode == SECCOMP_MODE_STRICT)
__secure_computing_strict(this_syscall);
@@ -650,6 +653,10 @@ u32 seccomp_phase1(struct seccomp_data *sd)
int this_syscall = sd ? sd->nr :
syscall_get_nr(current, task_pt_regs(current));
+ if (config_enabled(CONFIG_CHECKPOINT_RESTORE) &&
+ unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
+ return SECCOMP_PHASE1_OK;
+
switch (mode) {
case SECCOMP_MODE_STRICT:
__secure_computing_strict(this_syscall); /* may call do_exit */
diff --git a/kernel/system_certificates.S b/kernel/system_certificates.S
deleted file mode 100644
index 3e9868d47535..000000000000
--- a/kernel/system_certificates.S
+++ /dev/null
@@ -1,20 +0,0 @@
-#include <linux/export.h>
-#include <linux/init.h>
-
- __INITRODATA
-
- .align 8
- .globl VMLINUX_SYMBOL(system_certificate_list)
-VMLINUX_SYMBOL(system_certificate_list):
-__cert_list_start:
- .incbin "kernel/x509_certificate_list"
-__cert_list_end:
-
- .align 8
- .globl VMLINUX_SYMBOL(system_certificate_list_size)
-VMLINUX_SYMBOL(system_certificate_list_size):
-#ifdef CONFIG_64BIT
- .quad __cert_list_end - __cert_list_start
-#else
- .long __cert_list_end - __cert_list_start
-#endif
diff --git a/kernel/system_keyring.c b/kernel/system_keyring.c
deleted file mode 100644
index 875f64e8935b..000000000000
--- a/kernel/system_keyring.c
+++ /dev/null
@@ -1,106 +0,0 @@
-/* System trusted keyring for trusted public keys
- *
- * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#include <linux/export.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/cred.h>
-#include <linux/err.h>
-#include <keys/asymmetric-type.h>
-#include <keys/system_keyring.h>
-#include "module-internal.h"
-
-struct key *system_trusted_keyring;
-EXPORT_SYMBOL_GPL(system_trusted_keyring);
-
-extern __initconst const u8 system_certificate_list[];
-extern __initconst const unsigned long system_certificate_list_size;
-
-/*
- * Load the compiled-in keys
- */
-static __init int system_trusted_keyring_init(void)
-{
- pr_notice("Initialise system trusted keyring\n");
-
- system_trusted_keyring =
- keyring_alloc(".system_keyring",
- KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
- ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH),
- KEY_ALLOC_NOT_IN_QUOTA, NULL);
- if (IS_ERR(system_trusted_keyring))
- panic("Can't allocate system trusted keyring\n");
-
- set_bit(KEY_FLAG_TRUSTED_ONLY, &system_trusted_keyring->flags);
- return 0;
-}
-
-/*
- * Must be initialised before we try and load the keys into the keyring.
- */
-device_initcall(system_trusted_keyring_init);
-
-/*
- * Load the compiled-in list of X.509 certificates.
- */
-static __init int load_system_certificate_list(void)
-{
- key_ref_t key;
- const u8 *p, *end;
- size_t plen;
-
- pr_notice("Loading compiled-in X.509 certificates\n");
-
- p = system_certificate_list;
- end = p + system_certificate_list_size;
- while (p < end) {
- /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
- * than 256 bytes in size.
- */
- if (end - p < 4)
- goto dodgy_cert;
- if (p[0] != 0x30 &&
- p[1] != 0x82)
- goto dodgy_cert;
- plen = (p[2] << 8) | p[3];
- plen += 4;
- if (plen > end - p)
- goto dodgy_cert;
-
- key = key_create_or_update(make_key_ref(system_trusted_keyring, 1),
- "asymmetric",
- NULL,
- p,
- plen,
- ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ),
- KEY_ALLOC_NOT_IN_QUOTA |
- KEY_ALLOC_TRUSTED);
- if (IS_ERR(key)) {
- pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
- PTR_ERR(key));
- } else {
- set_bit(KEY_FLAG_BUILTIN, &key_ref_to_ptr(key)->flags);
- pr_notice("Loaded X.509 cert '%s'\n",
- key_ref_to_ptr(key)->description);
- key_ref_put(key);
- }
- p += plen;
- }
-
- return 0;
-
-dodgy_cert:
- pr_err("Problem parsing in-kernel X.509 certificate list\n");
- return 0;
-}
-late_initcall(load_system_certificate_list);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index eb11011b5292..b0623ac785a2 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -630,13 +630,18 @@ static int function_stat_show(struct seq_file *m, void *v)
goto out;
}
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ avg = rec->time;
+ do_div(avg, rec->counter);
+ if (tracing_thresh && (avg < tracing_thresh))
+ goto out;
+#endif
+
kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
seq_printf(m, " %-30.30s %10lu", str, rec->counter);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
seq_puts(m, " ");
- avg = rec->time;
- do_div(avg, rec->counter);
/* Sample standard deviation (s^2) */
if (rec->counter <= 1)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 6260717c18e3..fc347f8b1bca 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -400,6 +400,17 @@ struct rb_irq_work {
};
/*
+ * Structure to hold event state and handle nested events.
+ */
+struct rb_event_info {
+ u64 ts;
+ u64 delta;
+ unsigned long length;
+ struct buffer_page *tail_page;
+ int add_timestamp;
+};
+
+/*
* Used for which event context the event is in.
* NMI = 0
* IRQ = 1
@@ -1876,73 +1887,6 @@ rb_event_index(struct ring_buffer_event *event)
return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
}
-static inline int
-rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
- struct ring_buffer_event *event)
-{
- unsigned long addr = (unsigned long)event;
- unsigned long index;
-
- index = rb_event_index(event);
- addr &= PAGE_MASK;
-
- return cpu_buffer->commit_page->page == (void *)addr &&
- rb_commit_index(cpu_buffer) == index;
-}
-
-static void
-rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
-{
- unsigned long max_count;
-
- /*
- * We only race with interrupts and NMIs on this CPU.
- * If we own the commit event, then we can commit
- * all others that interrupted us, since the interruptions
- * are in stack format (they finish before they come
- * back to us). This allows us to do a simple loop to
- * assign the commit to the tail.
- */
- again:
- max_count = cpu_buffer->nr_pages * 100;
-
- while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
- if (RB_WARN_ON(cpu_buffer, !(--max_count)))
- return;
- if (RB_WARN_ON(cpu_buffer,
- rb_is_reader_page(cpu_buffer->tail_page)))
- return;
- local_set(&cpu_buffer->commit_page->page->commit,
- rb_page_write(cpu_buffer->commit_page));
- rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
- cpu_buffer->write_stamp =
- cpu_buffer->commit_page->page->time_stamp;
- /* add barrier to keep gcc from optimizing too much */
- barrier();
- }
- while (rb_commit_index(cpu_buffer) !=
- rb_page_write(cpu_buffer->commit_page)) {
-
- local_set(&cpu_buffer->commit_page->page->commit,
- rb_page_write(cpu_buffer->commit_page));
- RB_WARN_ON(cpu_buffer,
- local_read(&cpu_buffer->commit_page->page->commit) &
- ~RB_WRITE_MASK);
- barrier();
- }
-
- /* again, keep gcc from optimizing */
- barrier();
-
- /*
- * If an interrupt came in just after the first while loop
- * and pushed the tail page forward, we will be left with
- * a dangling commit that will never go forward.
- */
- if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
- goto again;
-}
-
static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
{
cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
@@ -1968,64 +1912,6 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
iter->head = 0;
}
-/* Slow path, do not inline */
-static noinline struct ring_buffer_event *
-rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
-{
- event->type_len = RINGBUF_TYPE_TIME_EXTEND;
-
- /* Not the first event on the page? */
- if (rb_event_index(event)) {
- event->time_delta = delta & TS_MASK;
- event->array[0] = delta >> TS_SHIFT;
- } else {
- /* nope, just zero it */
- event->time_delta = 0;
- event->array[0] = 0;
- }
-
- return skip_time_extend(event);
-}
-
-/**
- * rb_update_event - update event type and data
- * @event: the event to update
- * @type: the type of event
- * @length: the size of the event field in the ring buffer
- *
- * Update the type and data fields of the event. The length
- * is the actual size that is written to the ring buffer,
- * and with this, we can determine what to place into the
- * data field.
- */
-static void
-rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
- struct ring_buffer_event *event, unsigned length,
- int add_timestamp, u64 delta)
-{
- /* Only a commit updates the timestamp */
- if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
- delta = 0;
-
- /*
- * If we need to add a timestamp, then we
- * add it to the start of the resevered space.
- */
- if (unlikely(add_timestamp)) {
- event = rb_add_time_stamp(event, delta);
- length -= RB_LEN_TIME_EXTEND;
- delta = 0;
- }
-
- event->time_delta = delta;
- length -= RB_EVNT_HDR_SIZE;
- if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
- event->type_len = 0;
- event->array[0] = length;
- } else
- event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
-}
-
/*
* rb_handle_head_page - writer hit the head page
*
@@ -2184,29 +2070,13 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
return 0;
}
-static unsigned rb_calculate_event_length(unsigned length)
-{
- struct ring_buffer_event event; /* Used only for sizeof array */
-
- /* zero length can cause confusions */
- if (!length)
- length++;
-
- if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
- length += sizeof(event.array[0]);
-
- length += RB_EVNT_HDR_SIZE;
- length = ALIGN(length, RB_ARCH_ALIGNMENT);
-
- return length;
-}
-
static inline void
rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
- struct buffer_page *tail_page,
- unsigned long tail, unsigned long length)
+ unsigned long tail, struct rb_event_info *info)
{
+ struct buffer_page *tail_page = info->tail_page;
struct ring_buffer_event *event;
+ unsigned long length = info->length;
/*
* Only the event that crossed the page boundary
@@ -2276,13 +2146,14 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
*/
static noinline struct ring_buffer_event *
rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
- unsigned long length, unsigned long tail,
- struct buffer_page *tail_page, u64 ts)
+ unsigned long tail, struct rb_event_info *info)
{
+ struct buffer_page *tail_page = info->tail_page;
struct buffer_page *commit_page = cpu_buffer->commit_page;
struct ring_buffer *buffer = cpu_buffer->buffer;
struct buffer_page *next_page;
int ret;
+ u64 ts;
next_page = tail_page;
@@ -2368,74 +2239,120 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
out_again:
- rb_reset_tail(cpu_buffer, tail_page, tail, length);
+ rb_reset_tail(cpu_buffer, tail, info);
/* fail and let the caller try again */
return ERR_PTR(-EAGAIN);
out_reset:
/* reset write */
- rb_reset_tail(cpu_buffer, tail_page, tail, length);
+ rb_reset_tail(cpu_buffer, tail, info);
return NULL;
}
-static struct ring_buffer_event *
-__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
- unsigned long length, u64 ts,
- u64 delta, int add_timestamp)
+/* Slow path, do not inline */
+static noinline struct ring_buffer_event *
+rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
{
- struct buffer_page *tail_page;
- struct ring_buffer_event *event;
- unsigned long tail, write;
+ event->type_len = RINGBUF_TYPE_TIME_EXTEND;
- /*
- * If the time delta since the last event is too big to
- * hold in the time field of the event, then we append a
- * TIME EXTEND event ahead of the data event.
- */
- if (unlikely(add_timestamp))
- length += RB_LEN_TIME_EXTEND;
+ /* Not the first event on the page? */
+ if (rb_event_index(event)) {
+ event->time_delta = delta & TS_MASK;
+ event->array[0] = delta >> TS_SHIFT;
+ } else {
+ /* nope, just zero it */
+ event->time_delta = 0;
+ event->array[0] = 0;
+ }
- tail_page = cpu_buffer->tail_page;
- write = local_add_return(length, &tail_page->write);
+ return skip_time_extend(event);
+}
- /* set write to only the index of the write */
- write &= RB_WRITE_MASK;
- tail = write - length;
+static inline int rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
+ struct ring_buffer_event *event);
+
+/**
+ * rb_update_event - update event type and data
+ * @event: the event to update
+ * @type: the type of event
+ * @length: the size of the event field in the ring buffer
+ *
+ * Update the type and data fields of the event. The length
+ * is the actual size that is written to the ring buffer,
+ * and with this, we can determine what to place into the
+ * data field.
+ */
+static void
+rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
+ struct ring_buffer_event *event,
+ struct rb_event_info *info)
+{
+ unsigned length = info->length;
+ u64 delta = info->delta;
+
+ /* Only a commit updates the timestamp */
+ if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
+ delta = 0;
/*
- * If this is the first commit on the page, then it has the same
- * timestamp as the page itself.
+ * If we need to add a timestamp, then we
+ * add it to the start of the resevered space.
*/
- if (!tail)
+ if (unlikely(info->add_timestamp)) {
+ event = rb_add_time_stamp(event, delta);
+ length -= RB_LEN_TIME_EXTEND;
delta = 0;
+ }
- /* See if we shot pass the end of this buffer page */
- if (unlikely(write > BUF_PAGE_SIZE))
- return rb_move_tail(cpu_buffer, length, tail,
- tail_page, ts);
+ event->time_delta = delta;
+ length -= RB_EVNT_HDR_SIZE;
+ if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
+ event->type_len = 0;
+ event->array[0] = length;
+ } else
+ event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
+}
- /* We reserved something on the buffer */
+static unsigned rb_calculate_event_length(unsigned length)
+{
+ struct ring_buffer_event event; /* Used only for sizeof array */
- event = __rb_page_index(tail_page, tail);
- kmemcheck_annotate_bitfield(event, bitfield);
- rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
+ /* zero length can cause confusions */
+ if (!length)
+ length++;
- local_inc(&tail_page->entries);
+ if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
+ length += sizeof(event.array[0]);
+
+ length += RB_EVNT_HDR_SIZE;
+ length = ALIGN(length, RB_ARCH_ALIGNMENT);
/*
- * If this is the first commit on the page, then update
- * its timestamp.
+ * In case the time delta is larger than the 27 bits for it
+ * in the header, we need to add a timestamp. If another
+ * event comes in when trying to discard this one to increase
+ * the length, then the timestamp will be added in the allocated
+ * space of this event. If length is bigger than the size needed
+ * for the TIME_EXTEND, then padding has to be used. The events
+ * length must be either RB_LEN_TIME_EXTEND, or greater than or equal
+ * to RB_LEN_TIME_EXTEND + 8, as 8 is the minimum size for padding.
+ * As length is a multiple of 4, we only need to worry if it
+ * is 12 (RB_LEN_TIME_EXTEND + 4).
*/
- if (!tail)
- tail_page->page->time_stamp = ts;
+ if (length == RB_LEN_TIME_EXTEND + RB_ALIGNMENT)
+ length += RB_ALIGNMENT;
- /* account for these added bytes */
- local_add(length, &cpu_buffer->entries_bytes);
+ return length;
+}
- return event;
+#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+static inline bool sched_clock_stable(void)
+{
+ return true;
}
+#endif
static inline int
rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
@@ -2483,6 +2400,59 @@ static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
local_inc(&cpu_buffer->commits);
}
+static void
+rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ unsigned long max_count;
+
+ /*
+ * We only race with interrupts and NMIs on this CPU.
+ * If we own the commit event, then we can commit
+ * all others that interrupted us, since the interruptions
+ * are in stack format (they finish before they come
+ * back to us). This allows us to do a simple loop to
+ * assign the commit to the tail.
+ */
+ again:
+ max_count = cpu_buffer->nr_pages * 100;
+
+ while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
+ if (RB_WARN_ON(cpu_buffer, !(--max_count)))
+ return;
+ if (RB_WARN_ON(cpu_buffer,
+ rb_is_reader_page(cpu_buffer->tail_page)))
+ return;
+ local_set(&cpu_buffer->commit_page->page->commit,
+ rb_page_write(cpu_buffer->commit_page));
+ rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
+ cpu_buffer->write_stamp =
+ cpu_buffer->commit_page->page->time_stamp;
+ /* add barrier to keep gcc from optimizing too much */
+ barrier();
+ }
+ while (rb_commit_index(cpu_buffer) !=
+ rb_page_write(cpu_buffer->commit_page)) {
+
+ local_set(&cpu_buffer->commit_page->page->commit,
+ rb_page_write(cpu_buffer->commit_page));
+ RB_WARN_ON(cpu_buffer,
+ local_read(&cpu_buffer->commit_page->page->commit) &
+ ~RB_WRITE_MASK);
+ barrier();
+ }
+
+ /* again, keep gcc from optimizing */
+ barrier();
+
+ /*
+ * If an interrupt came in just after the first while loop
+ * and pushed the tail page forward, we will be left with
+ * a dangling commit that will never go forward.
+ */
+ if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
+ goto again;
+}
+
static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
{
unsigned long commits;
@@ -2515,91 +2485,94 @@ static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
}
}
-static struct ring_buffer_event *
-rb_reserve_next_event(struct ring_buffer *buffer,
- struct ring_buffer_per_cpu *cpu_buffer,
- unsigned long length)
+static inline void rb_event_discard(struct ring_buffer_event *event)
{
- struct ring_buffer_event *event;
- u64 ts, delta;
- int nr_loops = 0;
- int add_timestamp;
- u64 diff;
+ if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+ event = skip_time_extend(event);
- rb_start_commit(cpu_buffer);
+ /* array[0] holds the actual length for the discarded event */
+ event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
+ event->type_len = RINGBUF_TYPE_PADDING;
+ /* time delta must be non zero */
+ if (!event->time_delta)
+ event->time_delta = 1;
+}
-#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
- /*
- * Due to the ability to swap a cpu buffer from a buffer
- * it is possible it was swapped before we committed.
- * (committing stops a swap). We check for it here and
- * if it happened, we have to fail the write.
- */
- barrier();
- if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) {
- local_dec(&cpu_buffer->committing);
- local_dec(&cpu_buffer->commits);
- return NULL;
- }
-#endif
+static inline int
+rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
+ struct ring_buffer_event *event)
+{
+ unsigned long addr = (unsigned long)event;
+ unsigned long index;
- length = rb_calculate_event_length(length);
- again:
- add_timestamp = 0;
- delta = 0;
+ index = rb_event_index(event);
+ addr &= PAGE_MASK;
+
+ return cpu_buffer->commit_page->page == (void *)addr &&
+ rb_commit_index(cpu_buffer) == index;
+}
+
+static void
+rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
+ struct ring_buffer_event *event)
+{
+ u64 delta;
/*
- * We allow for interrupts to reenter here and do a trace.
- * If one does, it will cause this original code to loop
- * back here. Even with heavy interrupts happening, this
- * should only happen a few times in a row. If this happens
- * 1000 times in a row, there must be either an interrupt
- * storm or we have something buggy.
- * Bail!
+ * The event first in the commit queue updates the
+ * time stamp.
*/
- if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
- goto out_fail;
+ if (rb_event_is_commit(cpu_buffer, event)) {
+ /*
+ * A commit event that is first on a page
+ * updates the write timestamp with the page stamp
+ */
+ if (!rb_event_index(event))
+ cpu_buffer->write_stamp =
+ cpu_buffer->commit_page->page->time_stamp;
+ else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
+ delta = event->array[0];
+ delta <<= TS_SHIFT;
+ delta += event->time_delta;
+ cpu_buffer->write_stamp += delta;
+ } else
+ cpu_buffer->write_stamp += event->time_delta;
+ }
+}
- ts = rb_time_stamp(cpu_buffer->buffer);
- diff = ts - cpu_buffer->write_stamp;
+static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
+ struct ring_buffer_event *event)
+{
+ local_inc(&cpu_buffer->entries);
+ rb_update_write_stamp(cpu_buffer, event);
+ rb_end_commit(cpu_buffer);
+}
- /* make sure this diff is calculated here */
- barrier();
+static __always_inline void
+rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
+{
+ bool pagebusy;
- /* Did the write stamp get updated already? */
- if (likely(ts >= cpu_buffer->write_stamp)) {
- delta = diff;
- if (unlikely(test_time_stamp(delta))) {
- int local_clock_stable = 1;
-#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
- local_clock_stable = sched_clock_stable();
-#endif
- WARN_ONCE(delta > (1ULL << 59),
- KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
- (unsigned long long)delta,
- (unsigned long long)ts,
- (unsigned long long)cpu_buffer->write_stamp,
- local_clock_stable ? "" :
- "If you just came from a suspend/resume,\n"
- "please switch to the trace global clock:\n"
- " echo global > /sys/kernel/debug/tracing/trace_clock\n");
- add_timestamp = 1;
- }
+ if (buffer->irq_work.waiters_pending) {
+ buffer->irq_work.waiters_pending = false;
+ /* irq_work_queue() supplies it's own memory barriers */
+ irq_work_queue(&buffer->irq_work.work);
}
- event = __rb_reserve_next(cpu_buffer, length, ts,
- delta, add_timestamp);
- if (unlikely(PTR_ERR(event) == -EAGAIN))
- goto again;
-
- if (!event)
- goto out_fail;
+ if (cpu_buffer->irq_work.waiters_pending) {
+ cpu_buffer->irq_work.waiters_pending = false;
+ /* irq_work_queue() supplies it's own memory barriers */
+ irq_work_queue(&cpu_buffer->irq_work.work);
+ }
- return event;
+ pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
- out_fail:
- rb_end_commit(cpu_buffer);
- return NULL;
+ if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
+ cpu_buffer->irq_work.wakeup_full = true;
+ cpu_buffer->irq_work.full_waiters_pending = false;
+ /* irq_work_queue() supplies it's own memory barriers */
+ irq_work_queue(&cpu_buffer->irq_work.work);
+ }
}
/*
@@ -2672,6 +2645,178 @@ trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
}
/**
+ * ring_buffer_unlock_commit - commit a reserved
+ * @buffer: The buffer to commit to
+ * @event: The event pointer to commit.
+ *
+ * This commits the data to the ring buffer, and releases any locks held.
+ *
+ * Must be paired with ring_buffer_lock_reserve.
+ */
+int ring_buffer_unlock_commit(struct ring_buffer *buffer,
+ struct ring_buffer_event *event)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ int cpu = raw_smp_processor_id();
+
+ cpu_buffer = buffer->buffers[cpu];
+
+ rb_commit(cpu_buffer, event);
+
+ rb_wakeups(buffer, cpu_buffer);
+
+ trace_recursive_unlock(cpu_buffer);
+
+ preempt_enable_notrace();
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
+
+static noinline void
+rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
+ struct rb_event_info *info)
+{
+ WARN_ONCE(info->delta > (1ULL << 59),
+ KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
+ (unsigned long long)info->delta,
+ (unsigned long long)info->ts,
+ (unsigned long long)cpu_buffer->write_stamp,
+ sched_clock_stable() ? "" :
+ "If you just came from a suspend/resume,\n"
+ "please switch to the trace global clock:\n"
+ " echo global > /sys/kernel/debug/tracing/trace_clock\n");
+ info->add_timestamp = 1;
+}
+
+static struct ring_buffer_event *
+__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
+ struct rb_event_info *info)
+{
+ struct ring_buffer_event *event;
+ struct buffer_page *tail_page;
+ unsigned long tail, write;
+
+ /*
+ * If the time delta since the last event is too big to
+ * hold in the time field of the event, then we append a
+ * TIME EXTEND event ahead of the data event.
+ */
+ if (unlikely(info->add_timestamp))
+ info->length += RB_LEN_TIME_EXTEND;
+
+ tail_page = info->tail_page = cpu_buffer->tail_page;
+ write = local_add_return(info->length, &tail_page->write);
+
+ /* set write to only the index of the write */
+ write &= RB_WRITE_MASK;
+ tail = write - info->length;
+
+ /*
+ * If this is the first commit on the page, then it has the same
+ * timestamp as the page itself.
+ */
+ if (!tail)
+ info->delta = 0;
+
+ /* See if we shot pass the end of this buffer page */
+ if (unlikely(write > BUF_PAGE_SIZE))
+ return rb_move_tail(cpu_buffer, tail, info);
+
+ /* We reserved something on the buffer */
+
+ event = __rb_page_index(tail_page, tail);
+ kmemcheck_annotate_bitfield(event, bitfield);
+ rb_update_event(cpu_buffer, event, info);
+
+ local_inc(&tail_page->entries);
+
+ /*
+ * If this is the first commit on the page, then update
+ * its timestamp.
+ */
+ if (!tail)
+ tail_page->page->time_stamp = info->ts;
+
+ /* account for these added bytes */
+ local_add(info->length, &cpu_buffer->entries_bytes);
+
+ return event;
+}
+
+static struct ring_buffer_event *
+rb_reserve_next_event(struct ring_buffer *buffer,
+ struct ring_buffer_per_cpu *cpu_buffer,
+ unsigned long length)
+{
+ struct ring_buffer_event *event;
+ struct rb_event_info info;
+ int nr_loops = 0;
+ u64 diff;
+
+ rb_start_commit(cpu_buffer);
+
+#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
+ /*
+ * Due to the ability to swap a cpu buffer from a buffer
+ * it is possible it was swapped before we committed.
+ * (committing stops a swap). We check for it here and
+ * if it happened, we have to fail the write.
+ */
+ barrier();
+ if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) {
+ local_dec(&cpu_buffer->committing);
+ local_dec(&cpu_buffer->commits);
+ return NULL;
+ }
+#endif
+
+ info.length = rb_calculate_event_length(length);
+ again:
+ info.add_timestamp = 0;
+ info.delta = 0;
+
+ /*
+ * We allow for interrupts to reenter here and do a trace.
+ * If one does, it will cause this original code to loop
+ * back here. Even with heavy interrupts happening, this
+ * should only happen a few times in a row. If this happens
+ * 1000 times in a row, there must be either an interrupt
+ * storm or we have something buggy.
+ * Bail!
+ */
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
+ goto out_fail;
+
+ info.ts = rb_time_stamp(cpu_buffer->buffer);
+ diff = info.ts - cpu_buffer->write_stamp;
+
+ /* make sure this diff is calculated here */
+ barrier();
+
+ /* Did the write stamp get updated already? */
+ if (likely(info.ts >= cpu_buffer->write_stamp)) {
+ info.delta = diff;
+ if (unlikely(test_time_stamp(info.delta)))
+ rb_handle_timestamp(cpu_buffer, &info);
+ }
+
+ event = __rb_reserve_next(cpu_buffer, &info);
+
+ if (unlikely(PTR_ERR(event) == -EAGAIN))
+ goto again;
+
+ if (!event)
+ goto out_fail;
+
+ return event;
+
+ out_fail:
+ rb_end_commit(cpu_buffer);
+ return NULL;
+}
+
+/**
* ring_buffer_lock_reserve - reserve a part of the buffer
* @buffer: the ring buffer to reserve from
* @length: the length of the data to reserve (excluding event header)
@@ -2729,111 +2874,6 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
}
EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
-static void
-rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
- struct ring_buffer_event *event)
-{
- u64 delta;
-
- /*
- * The event first in the commit queue updates the
- * time stamp.
- */
- if (rb_event_is_commit(cpu_buffer, event)) {
- /*
- * A commit event that is first on a page
- * updates the write timestamp with the page stamp
- */
- if (!rb_event_index(event))
- cpu_buffer->write_stamp =
- cpu_buffer->commit_page->page->time_stamp;
- else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
- delta = event->array[0];
- delta <<= TS_SHIFT;
- delta += event->time_delta;
- cpu_buffer->write_stamp += delta;
- } else
- cpu_buffer->write_stamp += event->time_delta;
- }
-}
-
-static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
- struct ring_buffer_event *event)
-{
- local_inc(&cpu_buffer->entries);
- rb_update_write_stamp(cpu_buffer, event);
- rb_end_commit(cpu_buffer);
-}
-
-static __always_inline void
-rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
-{
- bool pagebusy;
-
- if (buffer->irq_work.waiters_pending) {
- buffer->irq_work.waiters_pending = false;
- /* irq_work_queue() supplies it's own memory barriers */
- irq_work_queue(&buffer->irq_work.work);
- }
-
- if (cpu_buffer->irq_work.waiters_pending) {
- cpu_buffer->irq_work.waiters_pending = false;
- /* irq_work_queue() supplies it's own memory barriers */
- irq_work_queue(&cpu_buffer->irq_work.work);
- }
-
- pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
-
- if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
- cpu_buffer->irq_work.wakeup_full = true;
- cpu_buffer->irq_work.full_waiters_pending = false;
- /* irq_work_queue() supplies it's own memory barriers */
- irq_work_queue(&cpu_buffer->irq_work.work);
- }
-}
-
-/**
- * ring_buffer_unlock_commit - commit a reserved
- * @buffer: The buffer to commit to
- * @event: The event pointer to commit.
- *
- * This commits the data to the ring buffer, and releases any locks held.
- *
- * Must be paired with ring_buffer_lock_reserve.
- */
-int ring_buffer_unlock_commit(struct ring_buffer *buffer,
- struct ring_buffer_event *event)
-{
- struct ring_buffer_per_cpu *cpu_buffer;
- int cpu = raw_smp_processor_id();
-
- cpu_buffer = buffer->buffers[cpu];
-
- rb_commit(cpu_buffer, event);
-
- rb_wakeups(buffer, cpu_buffer);
-
- trace_recursive_unlock(cpu_buffer);
-
- preempt_enable_notrace();
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
-
-static inline void rb_event_discard(struct ring_buffer_event *event)
-{
- if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
- event = skip_time_extend(event);
-
- /* array[0] holds the actual length for the discarded event */
- event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
- event->type_len = RINGBUF_TYPE_PADDING;
- /* time delta must be non zero */
- if (!event->time_delta)
- event->time_delta = 1;
-}
-
/*
* Decrement the entries to the page that an event is on.
* The event does not even need to exist, only the pointer
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index abcbf7ff8743..6e79408674aa 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3035,7 +3035,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
if (!iter)
return ERR_PTR(-ENOMEM);
- iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
+ iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
GFP_KERNEL);
if (!iter->buffer_iter)
goto release;
@@ -6990,7 +6990,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
trace_init_global_iter(&iter);
for_each_tracing_cpu(cpu) {
- atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
+ atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
}
old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 404a372ad85a..7ca09cdc20c2 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -30,6 +30,7 @@
DEFINE_MUTEX(event_mutex);
LIST_HEAD(ftrace_events);
+static LIST_HEAD(ftrace_generic_fields);
static LIST_HEAD(ftrace_common_fields);
#define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
@@ -94,6 +95,10 @@ trace_find_event_field(struct trace_event_call *call, char *name)
struct ftrace_event_field *field;
struct list_head *head;
+ field = __find_event_field(&ftrace_generic_fields, name);
+ if (field)
+ return field;
+
field = __find_event_field(&ftrace_common_fields, name);
if (field)
return field;
@@ -144,6 +149,13 @@ int trace_define_field(struct trace_event_call *call, const char *type,
}
EXPORT_SYMBOL_GPL(trace_define_field);
+#define __generic_field(type, item, filter_type) \
+ ret = __trace_define_field(&ftrace_generic_fields, #type, \
+ #item, 0, 0, is_signed_type(type), \
+ filter_type); \
+ if (ret) \
+ return ret;
+
#define __common_field(type, item) \
ret = __trace_define_field(&ftrace_common_fields, #type, \
"common_" #item, \
@@ -153,6 +165,16 @@ EXPORT_SYMBOL_GPL(trace_define_field);
if (ret) \
return ret;
+static int trace_define_generic_fields(void)
+{
+ int ret;
+
+ __generic_field(int, cpu, FILTER_OTHER);
+ __generic_field(char *, comm, FILTER_PTR_STRING);
+
+ return ret;
+}
+
static int trace_define_common_fields(void)
{
int ret;
@@ -2671,6 +2693,9 @@ static __init int event_trace_init(void)
if (!entry)
pr_warn("Could not create tracefs 'available_events' entry\n");
+ if (trace_define_generic_fields())
+ pr_warn("tracing: Failed to allocated generic fields");
+
if (trace_define_common_fields())
pr_warn("tracing: Failed to allocate common fields");
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index d81d6f302b14..bd1bf184c5c9 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -252,6 +252,50 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event)
return match;
}
+/* Filter predicate for CPUs. */
+static int filter_pred_cpu(struct filter_pred *pred, void *event)
+{
+ int cpu, cmp;
+ int match = 0;
+
+ cpu = raw_smp_processor_id();
+ cmp = pred->val;
+
+ switch (pred->op) {
+ case OP_EQ:
+ match = cpu == cmp;
+ break;
+ case OP_LT:
+ match = cpu < cmp;
+ break;
+ case OP_LE:
+ match = cpu <= cmp;
+ break;
+ case OP_GT:
+ match = cpu > cmp;
+ break;
+ case OP_GE:
+ match = cpu >= cmp;
+ break;
+ default:
+ break;
+ }
+
+ return !!match == !pred->not;
+}
+
+/* Filter predicate for COMM. */
+static int filter_pred_comm(struct filter_pred *pred, void *event)
+{
+ int cmp, match;
+
+ cmp = pred->regex.match(current->comm, &pred->regex,
+ pred->regex.field_len);
+ match = cmp ^ pred->not;
+
+ return match;
+}
+
static int filter_pred_none(struct filter_pred *pred, void *event)
{
return 0;
@@ -1002,7 +1046,10 @@ static int init_pred(struct filter_parse_state *ps,
if (is_string_field(field)) {
filter_build_regex(pred);
- if (field->filter_type == FILTER_STATIC_STRING) {
+ if (!strcmp(field->name, "comm")) {
+ fn = filter_pred_comm;
+ pred->regex.field_len = TASK_COMM_LEN;
+ } else if (field->filter_type == FILTER_STATIC_STRING) {
fn = filter_pred_string;
pred->regex.field_len = field->size;
} else if (field->filter_type == FILTER_DYN_STRING)
@@ -1025,7 +1072,10 @@ static int init_pred(struct filter_parse_state *ps,
}
pred->val = val;
- fn = select_comparison_fn(pred->op, field->size,
+ if (!strcmp(field->name, "cpu"))
+ fn = filter_pred_cpu;
+ else
+ fn = select_comparison_fn(pred->op, field->size,
field->is_signed);
if (!fn) {
parse_error(ps, FILT_ERR_INVALID_OP, 0);
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 8968bf720c12..ca98445782ac 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -715,13 +715,13 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
snprintf(nsecs_str, slen, "%03lu", nsecs_rem);
trace_seq_printf(s, ".%s", nsecs_str);
- len += strlen(nsecs_str);
+ len += strlen(nsecs_str) + 1;
}
trace_seq_puts(s, " us ");
/* Print remaining spaces to fit the row's width */
- for (i = len; i < 7; i++)
+ for (i = len; i < 8; i++)
trace_seq_putc(s, ' ');
}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index dfab253727dc..8e481a84aeea 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -496,6 +496,8 @@ static const struct trace_mark {
char sym;
} mark[] = {
MARK(1000000000ULL , '$'), /* 1 sec */
+ MARK(100000000ULL , '@'), /* 100 msec */
+ MARK(10000000ULL , '*'), /* 10 msec */
MARK(1000000ULL , '#'), /* 1000 usecs */
MARK(100000ULL , '!'), /* 100 usecs */
MARK(10000ULL , '+'), /* 10 usecs */
@@ -508,7 +510,7 @@ char trace_find_mark(unsigned long long d)
int size = ARRAY_SIZE(mark);
for (i = 0; i < size; i++) {
- if (d >= mark[i].val)
+ if (d > mark[i].val)
break;
}
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 3f34496244e9..b746399ab59c 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -18,12 +18,6 @@
#define STACK_TRACE_ENTRIES 500
-#ifdef CC_USING_FENTRY
-# define fentry 1
-#else
-# define fentry 0
-#endif
-
static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
{ [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
@@ -35,7 +29,7 @@ static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
*/
static struct stack_trace max_stack_trace = {
.max_entries = STACK_TRACE_ENTRIES - 1,
- .entries = &stack_dump_trace[1],
+ .entries = &stack_dump_trace[0],
};
static unsigned long max_stack_size;
@@ -55,7 +49,7 @@ static inline void print_max_stack(void)
pr_emerg(" Depth Size Location (%d entries)\n"
" ----- ---- --------\n",
- max_stack_trace.nr_entries - 1);
+ max_stack_trace.nr_entries);
for (i = 0; i < max_stack_trace.nr_entries; i++) {
if (stack_dump_trace[i] == ULONG_MAX)
@@ -77,7 +71,7 @@ check_stack(unsigned long ip, unsigned long *stack)
unsigned long this_size, flags; unsigned long *p, *top, *start;
static int tracer_frame;
int frame_size = ACCESS_ONCE(tracer_frame);
- int i;
+ int i, x;
this_size = ((unsigned long)stack) & (THREAD_SIZE-1);
this_size = THREAD_SIZE - this_size;
@@ -105,26 +99,20 @@ check_stack(unsigned long ip, unsigned long *stack)
max_stack_size = this_size;
max_stack_trace.nr_entries = 0;
-
- if (using_ftrace_ops_list_func())
- max_stack_trace.skip = 4;
- else
- max_stack_trace.skip = 3;
+ max_stack_trace.skip = 3;
save_stack_trace(&max_stack_trace);
- /*
- * Add the passed in ip from the function tracer.
- * Searching for this on the stack will skip over
- * most of the overhead from the stack tracer itself.
- */
- stack_dump_trace[0] = ip;
- max_stack_trace.nr_entries++;
+ /* Skip over the overhead of the stack tracer itself */
+ for (i = 0; i < max_stack_trace.nr_entries; i++) {
+ if (stack_dump_trace[i] == ip)
+ break;
+ }
/*
* Now find where in the stack these are.
*/
- i = 0;
+ x = 0;
start = stack;
top = (unsigned long *)
(((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
@@ -139,12 +127,15 @@ check_stack(unsigned long ip, unsigned long *stack)
while (i < max_stack_trace.nr_entries) {
int found = 0;
- stack_dump_index[i] = this_size;
+ stack_dump_index[x] = this_size;
p = start;
for (; p < top && i < max_stack_trace.nr_entries; p++) {
+ if (stack_dump_trace[i] == ULONG_MAX)
+ break;
if (*p == stack_dump_trace[i]) {
- this_size = stack_dump_index[i++] =
+ stack_dump_trace[x] = stack_dump_trace[i++];
+ this_size = stack_dump_index[x++] =
(top - p) * sizeof(unsigned long);
found = 1;
/* Start the search from here */
@@ -156,7 +147,7 @@ check_stack(unsigned long ip, unsigned long *stack)
* out what that is, then figure it out
* now.
*/
- if (unlikely(!tracer_frame) && i == 1) {
+ if (unlikely(!tracer_frame)) {
tracer_frame = (p - stack) *
sizeof(unsigned long);
max_stack_size -= tracer_frame;
@@ -168,6 +159,10 @@ check_stack(unsigned long ip, unsigned long *stack)
i++;
}
+ max_stack_trace.nr_entries = x;
+ for (; x < i; x++)
+ stack_dump_trace[x] = ULONG_MAX;
+
if (task_stack_end_corrupted(current)) {
print_max_stack();
BUG();
@@ -192,24 +187,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip,
if (per_cpu(trace_active, cpu)++ != 0)
goto out;
- /*
- * When fentry is used, the traced function does not get
- * its stack frame set up, and we lose the parent.
- * The ip is pretty useless because the function tracer
- * was called before that function set up its stack frame.
- * In this case, we use the parent ip.
- *
- * By adding the return address of either the parent ip
- * or the current ip we can disregard most of the stack usage
- * caused by the stack tracer itself.
- *
- * The function tracer always reports the address of where the
- * mcount call was, but the stack will hold the return address.
- */
- if (fentry)
- ip = parent_ip;
- else
- ip += MCOUNT_INSN_SIZE;
+ ip += MCOUNT_INSN_SIZE;
check_stack(ip, &stack);
@@ -284,7 +262,7 @@ __next(struct seq_file *m, loff_t *pos)
{
long n = *pos - 1;
- if (n >= max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX)
+ if (n > max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX)
return NULL;
m->private = (void *)n;
@@ -354,7 +332,7 @@ static int t_show(struct seq_file *m, void *v)
seq_printf(m, " Depth Size Location"
" (%d entries)\n"
" ----- ---- --------\n",
- max_stack_trace.nr_entries - 1);
+ max_stack_trace.nr_entries);
if (!stack_tracer_enabled && !max_stack_size)
print_disabled(m);