aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile4
-rw-r--r--kernel/acct.c2
-rw-r--r--kernel/bpf/Makefile3
-rw-r--r--kernel/bpf/arraymap.c6
-rw-r--r--kernel/bpf/core.c8
-rw-r--r--kernel/bpf/hashtab.c6
-rw-r--r--kernel/bpf/helpers.c30
-rw-r--r--kernel/bpf/syscall.c11
-rw-r--r--kernel/bpf/test_stub.c78
-rw-r--r--kernel/bpf/verifier.c177
-rw-r--r--kernel/capability.c35
-rw-r--r--kernel/cgroup.c6
-rw-r--r--kernel/cred.c3
-rw-r--r--kernel/events/core.c2
-rw-r--r--kernel/exec_domain.c137
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c145
-rw-r--r--kernel/gcov/base.c5
-rw-r--r--kernel/groups.c3
-rw-r--r--kernel/hung_task.c4
-rw-r--r--kernel/pid.c15
-rw-r--r--kernel/ptrace.c39
-rw-r--r--kernel/reboot.c53
-rw-r--r--kernel/resource.c32
-rw-r--r--kernel/signal.c14
-rw-r--r--kernel/sys.c49
-rw-r--r--kernel/sys_ni.c14
-rw-r--r--kernel/sysctl.c25
-rw-r--r--kernel/trace/trace_stack.c4
29 files changed, 505 insertions, 407 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 1408b3353a3c..0f8f8b0bc1bf 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,7 +9,9 @@ obj-y = fork.o exec_domain.o panic.o \
extable.o params.o \
kthread.o sys_ni.o nsproxy.o \
notifier.o ksysfs.o cred.o reboot.o \
- async.o range.o groups.o smpboot.o
+ async.o range.o smpboot.o
+
+obj-$(CONFIG_MULTIUSER) += groups.o
ifdef CONFIG_FUNCTION_TRACER
# Do not trace debug files and internal ftrace files
diff --git a/kernel/acct.c b/kernel/acct.c
index e6c10d1a4058..74963d192c5d 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -213,7 +213,7 @@ static int acct_on(struct filename *pathname)
return -EACCES;
}
- if (!file->f_op->write) {
+ if (!(file->f_mode & FMODE_CAN_WRITE)) {
kfree(acct);
filp_close(file, NULL);
return -EIO;
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index a5ae60f0b0a2..e6983be12bd3 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1,5 +1,2 @@
obj-y := core.o
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o hashtab.o arraymap.o helpers.o
-ifdef CONFIG_TEST_BPF
-obj-$(CONFIG_BPF_SYSCALL) += test_stub.o
-endif
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 9eb4d8a7cd87..8a6616583f38 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -134,7 +134,7 @@ static void array_map_free(struct bpf_map *map)
kvfree(array);
}
-static struct bpf_map_ops array_ops = {
+static const struct bpf_map_ops array_ops = {
.map_alloc = array_map_alloc,
.map_free = array_map_free,
.map_get_next_key = array_map_get_next_key,
@@ -143,14 +143,14 @@ static struct bpf_map_ops array_ops = {
.map_delete_elem = array_map_delete_elem,
};
-static struct bpf_map_type_list tl = {
+static struct bpf_map_type_list array_type __read_mostly = {
.ops = &array_ops,
.type = BPF_MAP_TYPE_ARRAY,
};
static int __init register_array_map(void)
{
- bpf_register_map_type(&tl);
+ bpf_register_map_type(&array_type);
return 0;
}
late_initcall(register_array_map);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index a64e7a207d2b..4139a0f8b558 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -656,6 +656,14 @@ void bpf_prog_free(struct bpf_prog *fp)
}
EXPORT_SYMBOL_GPL(bpf_prog_free);
+/* Weak definitions of helper functions in case we don't have bpf syscall. */
+const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
+const struct bpf_func_proto bpf_map_update_elem_proto __weak;
+const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
+
+const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
+const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
+
/* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
* skb_copy_bits(), so provide a weak definition of it for NET-less config.
*/
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index b3ba43674310..83c209d9b17a 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -345,7 +345,7 @@ static void htab_map_free(struct bpf_map *map)
kfree(htab);
}
-static struct bpf_map_ops htab_ops = {
+static const struct bpf_map_ops htab_ops = {
.map_alloc = htab_map_alloc,
.map_free = htab_map_free,
.map_get_next_key = htab_map_get_next_key,
@@ -354,14 +354,14 @@ static struct bpf_map_ops htab_ops = {
.map_delete_elem = htab_map_delete_elem,
};
-static struct bpf_map_type_list tl = {
+static struct bpf_map_type_list htab_type __read_mostly = {
.ops = &htab_ops,
.type = BPF_MAP_TYPE_HASH,
};
static int __init register_htab_map(void)
{
- bpf_register_map_type(&tl);
+ bpf_register_map_type(&htab_type);
return 0;
}
late_initcall(register_htab_map);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 9e3414d85459..bd7f5988ed9c 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -11,6 +11,8 @@
*/
#include <linux/bpf.h>
#include <linux/rcupdate.h>
+#include <linux/random.h>
+#include <linux/smp.h>
/* If kernel subsystem is allowing eBPF programs to call this function,
* inside its own verifier_ops->get_func_proto() callback it should return
@@ -41,7 +43,7 @@ static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
return (unsigned long) value;
}
-struct bpf_func_proto bpf_map_lookup_elem_proto = {
+const struct bpf_func_proto bpf_map_lookup_elem_proto = {
.func = bpf_map_lookup_elem,
.gpl_only = false,
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
@@ -60,7 +62,7 @@ static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
return map->ops->map_update_elem(map, key, value, r4);
}
-struct bpf_func_proto bpf_map_update_elem_proto = {
+const struct bpf_func_proto bpf_map_update_elem_proto = {
.func = bpf_map_update_elem,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -80,10 +82,32 @@ static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
return map->ops->map_delete_elem(map, key);
}
-struct bpf_func_proto bpf_map_delete_elem_proto = {
+const struct bpf_func_proto bpf_map_delete_elem_proto = {
.func = bpf_map_delete_elem,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_MAP_KEY,
};
+
+static u64 bpf_get_prandom_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ return prandom_u32();
+}
+
+const struct bpf_func_proto bpf_get_prandom_u32_proto = {
+ .func = bpf_get_prandom_u32,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+};
+
+static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ return raw_smp_processor_id();
+}
+
+const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
+ .func = bpf_get_smp_processor_id,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 504c10b990ef..3bae6c591914 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -355,10 +355,11 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
list_for_each_entry(tl, &bpf_prog_types, list_node) {
if (tl->type == type) {
prog->aux->ops = tl->ops;
- prog->aux->prog_type = type;
+ prog->type = type;
return 0;
}
}
+
return -EINVAL;
}
@@ -419,6 +420,7 @@ void bpf_prog_put(struct bpf_prog *prog)
bpf_prog_free(prog);
}
}
+EXPORT_SYMBOL_GPL(bpf_prog_put);
static int bpf_prog_release(struct inode *inode, struct file *filp)
{
@@ -466,6 +468,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
fdput(f);
return prog;
}
+EXPORT_SYMBOL_GPL(bpf_prog_get);
/* last field in 'union bpf_attr' used by this command */
#define BPF_PROG_LOAD_LAST_FIELD kern_version
@@ -513,7 +516,7 @@ static int bpf_prog_load(union bpf_attr *attr)
prog->jited = false;
atomic_set(&prog->aux->refcnt, 1);
- prog->aux->is_gpl_compatible = is_gpl;
+ prog->gpl_compatible = is_gpl;
/* find program type: socket_filter vs tracing_filter */
err = find_prog_type(type, prog);
@@ -521,8 +524,7 @@ static int bpf_prog_load(union bpf_attr *attr)
goto free_prog;
/* run eBPF verifier */
- err = bpf_check(prog, attr);
-
+ err = bpf_check(&prog, attr);
if (err < 0)
goto free_used_maps;
@@ -533,7 +535,6 @@ static int bpf_prog_load(union bpf_attr *attr)
bpf_prog_select_runtime(prog);
err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC);
-
if (err < 0)
/* failed to allocate fd */
goto free_used_maps;
diff --git a/kernel/bpf/test_stub.c b/kernel/bpf/test_stub.c
deleted file mode 100644
index 0ceae1e6e8b5..000000000000
--- a/kernel/bpf/test_stub.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/err.h>
-#include <linux/bpf.h>
-
-/* test stubs for BPF_MAP_TYPE_UNSPEC and for BPF_PROG_TYPE_UNSPEC
- * to be used by user space verifier testsuite
- */
-struct bpf_context {
- u64 arg1;
- u64 arg2;
-};
-
-static const struct bpf_func_proto *test_func_proto(enum bpf_func_id func_id)
-{
- switch (func_id) {
- case BPF_FUNC_map_lookup_elem:
- return &bpf_map_lookup_elem_proto;
- case BPF_FUNC_map_update_elem:
- return &bpf_map_update_elem_proto;
- case BPF_FUNC_map_delete_elem:
- return &bpf_map_delete_elem_proto;
- default:
- return NULL;
- }
-}
-
-static const struct bpf_context_access {
- int size;
- enum bpf_access_type type;
-} test_ctx_access[] = {
- [offsetof(struct bpf_context, arg1)] = {
- FIELD_SIZEOF(struct bpf_context, arg1),
- BPF_READ
- },
- [offsetof(struct bpf_context, arg2)] = {
- FIELD_SIZEOF(struct bpf_context, arg2),
- BPF_READ
- },
-};
-
-static bool test_is_valid_access(int off, int size, enum bpf_access_type type)
-{
- const struct bpf_context_access *access;
-
- if (off < 0 || off >= ARRAY_SIZE(test_ctx_access))
- return false;
-
- access = &test_ctx_access[off];
- if (access->size == size && (access->type & type))
- return true;
-
- return false;
-}
-
-static struct bpf_verifier_ops test_ops = {
- .get_func_proto = test_func_proto,
- .is_valid_access = test_is_valid_access,
-};
-
-static struct bpf_prog_type_list tl_prog = {
- .ops = &test_ops,
- .type = BPF_PROG_TYPE_UNSPEC,
-};
-
-static int __init register_test_ops(void)
-{
- bpf_register_prog_type(&tl_prog);
- return 0;
-}
-late_initcall(register_test_ops);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a28e09c7825d..630a7bac1e51 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -755,7 +755,7 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
enum bpf_reg_type expected_type;
int err = 0;
- if (arg_type == ARG_ANYTHING)
+ if (arg_type == ARG_DONTCARE)
return 0;
if (reg->type == NOT_INIT) {
@@ -763,6 +763,9 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
return -EACCES;
}
+ if (arg_type == ARG_ANYTHING)
+ return 0;
+
if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_MAP_KEY ||
arg_type == ARG_PTR_TO_MAP_VALUE) {
expected_type = PTR_TO_STACK;
@@ -770,6 +773,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
expected_type = CONST_IMM;
} else if (arg_type == ARG_CONST_MAP_PTR) {
expected_type = CONST_PTR_TO_MAP;
+ } else if (arg_type == ARG_PTR_TO_CTX) {
+ expected_type = PTR_TO_CTX;
} else {
verbose("unsupported arg_type %d\n", arg_type);
return -EFAULT;
@@ -852,7 +857,7 @@ static int check_call(struct verifier_env *env, int func_id)
}
/* eBPF programs must be GPL compatible to use GPL-ed functions */
- if (!env->prog->aux->is_gpl_compatible && fn->gpl_only) {
+ if (!env->prog->gpl_compatible && fn->gpl_only) {
verbose("cannot call GPL only function from proprietary program\n");
return -EINVAL;
}
@@ -1172,6 +1177,18 @@ static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
return 0;
}
+static bool may_access_skb(enum bpf_prog_type type)
+{
+ switch (type) {
+ case BPF_PROG_TYPE_SOCKET_FILTER:
+ case BPF_PROG_TYPE_SCHED_CLS:
+ case BPF_PROG_TYPE_SCHED_ACT:
+ return true;
+ default:
+ return false;
+ }
+}
+
/* verify safety of LD_ABS|LD_IND instructions:
* - they can only appear in the programs where ctx == skb
* - since they are wrappers of function calls, they scratch R1-R5 registers,
@@ -1194,8 +1211,8 @@ static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn)
struct reg_state *reg;
int i, err;
- if (env->prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) {
- verbose("BPF_LD_ABS|IND instructions are only allowed in socket filters\n");
+ if (!may_access_skb(env->prog->type)) {
+ verbose("BPF_LD_ABS|IND instructions not allowed for this program type\n");
return -EINVAL;
}
@@ -1606,11 +1623,10 @@ static int do_check(struct verifier_env *env)
return err;
} else if (class == BPF_LDX) {
- if (BPF_MODE(insn->code) != BPF_MEM ||
- insn->imm != 0) {
- verbose("BPF_LDX uses reserved fields\n");
- return -EINVAL;
- }
+ enum bpf_reg_type src_reg_type;
+
+ /* check for reserved fields is already done */
+
/* check src operand */
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
if (err)
@@ -1629,6 +1645,29 @@ static int do_check(struct verifier_env *env)
if (err)
return err;
+ src_reg_type = regs[insn->src_reg].type;
+
+ if (insn->imm == 0 && BPF_SIZE(insn->code) == BPF_W) {
+ /* saw a valid insn
+ * dst_reg = *(u32 *)(src_reg + off)
+ * use reserved 'imm' field to mark this insn
+ */
+ insn->imm = src_reg_type;
+
+ } else if (src_reg_type != insn->imm &&
+ (src_reg_type == PTR_TO_CTX ||
+ insn->imm == PTR_TO_CTX)) {
+ /* ABuser program is trying to use the same insn
+ * dst_reg = *(u32*) (src_reg + off)
+ * with different pointer types:
+ * src_reg == ctx in one branch and
+ * src_reg == stack|map in some other branch.
+ * Reject it.
+ */
+ verbose("same insn cannot be used with different pointers\n");
+ return -EINVAL;
+ }
+
} else if (class == BPF_STX) {
if (BPF_MODE(insn->code) == BPF_XADD) {
err = check_xadd(env, insn);
@@ -1776,6 +1815,13 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
int i, j;
for (i = 0; i < insn_cnt; i++, insn++) {
+ if (BPF_CLASS(insn->code) == BPF_LDX &&
+ (BPF_MODE(insn->code) != BPF_MEM ||
+ insn->imm != 0)) {
+ verbose("BPF_LDX uses reserved fields\n");
+ return -EINVAL;
+ }
+
if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
struct bpf_map *map;
struct fd f;
@@ -1867,6 +1913,92 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env)
insn->src_reg = 0;
}
+static void adjust_branches(struct bpf_prog *prog, int pos, int delta)
+{
+ struct bpf_insn *insn = prog->insnsi;
+ int insn_cnt = prog->len;
+ int i;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ if (BPF_CLASS(insn->code) != BPF_JMP ||
+ BPF_OP(insn->code) == BPF_CALL ||
+ BPF_OP(insn->code) == BPF_EXIT)
+ continue;
+
+ /* adjust offset of jmps if necessary */
+ if (i < pos && i + insn->off + 1 > pos)
+ insn->off += delta;
+ else if (i > pos && i + insn->off + 1 < pos)
+ insn->off -= delta;
+ }
+}
+
+/* convert load instructions that access fields of 'struct __sk_buff'
+ * into sequence of instructions that access fields of 'struct sk_buff'
+ */
+static int convert_ctx_accesses(struct verifier_env *env)
+{
+ struct bpf_insn *insn = env->prog->insnsi;
+ int insn_cnt = env->prog->len;
+ struct bpf_insn insn_buf[16];
+ struct bpf_prog *new_prog;
+ u32 cnt;
+ int i;
+
+ if (!env->prog->aux->ops->convert_ctx_access)
+ return 0;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ if (insn->code != (BPF_LDX | BPF_MEM | BPF_W))
+ continue;
+
+ if (insn->imm != PTR_TO_CTX) {
+ /* clear internal mark */
+ insn->imm = 0;
+ continue;
+ }
+
+ cnt = env->prog->aux->ops->
+ convert_ctx_access(insn->dst_reg, insn->src_reg,
+ insn->off, insn_buf);
+ if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
+ verbose("bpf verifier is misconfigured\n");
+ return -EINVAL;
+ }
+
+ if (cnt == 1) {
+ memcpy(insn, insn_buf, sizeof(*insn));
+ continue;
+ }
+
+ /* several new insns need to be inserted. Make room for them */
+ insn_cnt += cnt - 1;
+ new_prog = bpf_prog_realloc(env->prog,
+ bpf_prog_size(insn_cnt),
+ GFP_USER);
+ if (!new_prog)
+ return -ENOMEM;
+
+ new_prog->len = insn_cnt;
+
+ memmove(new_prog->insnsi + i + cnt, new_prog->insns + i + 1,
+ sizeof(*insn) * (insn_cnt - i - cnt));
+
+ /* copy substitute insns in place of load instruction */
+ memcpy(new_prog->insnsi + i, insn_buf, sizeof(*insn) * cnt);
+
+ /* adjust branches in the whole program */
+ adjust_branches(new_prog, i, cnt - 1);
+
+ /* keep walking new program and skip insns we just inserted */
+ env->prog = new_prog;
+ insn = new_prog->insnsi + i + cnt - 1;
+ i += cnt - 1;
+ }
+
+ return 0;
+}
+
static void free_states(struct verifier_env *env)
{
struct verifier_state_list *sl, *sln;
@@ -1889,13 +2021,13 @@ static void free_states(struct verifier_env *env)
kfree(env->explored_states);
}
-int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
+int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
{
char __user *log_ubuf = NULL;
struct verifier_env *env;
int ret = -EINVAL;
- if (prog->len <= 0 || prog->len > BPF_MAXINSNS)
+ if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS)
return -E2BIG;
/* 'struct verifier_env' can be global, but since it's not small,
@@ -1905,7 +2037,7 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
if (!env)
return -ENOMEM;
- env->prog = prog;
+ env->prog = *prog;
/* grab the mutex to protect few globals used by verifier */
mutex_lock(&bpf_verifier_lock);
@@ -1937,7 +2069,7 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
if (ret < 0)
goto skip_full_check;
- env->explored_states = kcalloc(prog->len,
+ env->explored_states = kcalloc(env->prog->len,
sizeof(struct verifier_state_list *),
GFP_USER);
ret = -ENOMEM;
@@ -1954,6 +2086,10 @@ skip_full_check:
while (pop_stack(env, NULL) >= 0);
free_states(env);
+ if (ret == 0)
+ /* program is valid, convert *(u32*)(ctx + off) accesses */
+ ret = convert_ctx_accesses(env);
+
if (log_level && log_len >= log_size - 1) {
BUG_ON(log_len >= log_size);
/* verifier log exceeded user supplied buffer */
@@ -1969,18 +2105,18 @@ skip_full_check:
if (ret == 0 && env->used_map_cnt) {
/* if program passed verifier, update used_maps in bpf_prog_info */
- prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
- sizeof(env->used_maps[0]),
- GFP_KERNEL);
+ env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
+ sizeof(env->used_maps[0]),
+ GFP_KERNEL);
- if (!prog->aux->used_maps) {
+ if (!env->prog->aux->used_maps) {
ret = -ENOMEM;
goto free_log_buf;
}
- memcpy(prog->aux->used_maps, env->used_maps,
+ memcpy(env->prog->aux->used_maps, env->used_maps,
sizeof(env->used_maps[0]) * env->used_map_cnt);
- prog->aux->used_map_cnt = env->used_map_cnt;
+ env->prog->aux->used_map_cnt = env->used_map_cnt;
/* program is valid. Convert pseudo bpf_ld_imm64 into generic
* bpf_ld_imm64 instructions
@@ -1992,11 +2128,12 @@ free_log_buf:
if (log_level)
vfree(log_buf);
free_env:
- if (!prog->aux->used_maps)
+ if (!env->prog->aux->used_maps)
/* if we didn't copy map pointers into bpf_prog_info, release
* them now. Otherwise free_bpf_prog_info() will release them.
*/
release_maps(env);
+ *prog = env->prog;
kfree(env);
mutex_unlock(&bpf_verifier_lock);
return ret;
diff --git a/kernel/capability.c b/kernel/capability.c
index 989f5bfc57dc..45432b54d5c6 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -35,6 +35,7 @@ static int __init file_caps_disable(char *str)
}
__setup("no_file_caps", file_caps_disable);
+#ifdef CONFIG_MULTIUSER
/*
* More recent versions of libcap are available from:
*
@@ -386,6 +387,24 @@ bool ns_capable(struct user_namespace *ns, int cap)
}
EXPORT_SYMBOL(ns_capable);
+
+/**
+ * capable - Determine if the current task has a superior capability in effect
+ * @cap: The capability to be tested for
+ *
+ * Return true if the current task has the given superior capability currently
+ * available for use, false if not.
+ *
+ * This sets PF_SUPERPRIV on the task if the capability is available on the
+ * assumption that it's about to be used.
+ */
+bool capable(int cap)
+{
+ return ns_capable(&init_user_ns, cap);
+}
+EXPORT_SYMBOL(capable);
+#endif /* CONFIG_MULTIUSER */
+
/**
* file_ns_capable - Determine if the file's opener had a capability in effect
* @file: The file we want to check
@@ -412,22 +431,6 @@ bool file_ns_capable(const struct file *file, struct user_namespace *ns,
EXPORT_SYMBOL(file_ns_capable);
/**
- * capable - Determine if the current task has a superior capability in effect
- * @cap: The capability to be tested for
- *
- * Return true if the current task has the given superior capability currently
- * available for use, false if not.
- *
- * This sets PF_SUPERPRIV on the task if the capability is available on the
- * assumption that it's about to be used.
- */
-bool capable(int cap)
-{
- return ns_capable(&init_user_ns, cap);
-}
-EXPORT_SYMBOL(capable);
-
-/**
* capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped
* @inode: The inode in question
* @cap: The capability in question
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a220fdb66568..469dd547770c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4196,7 +4196,9 @@ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
static int cgroup_pidlist_show(struct seq_file *s, void *v)
{
- return seq_printf(s, "%d\n", *(int *)v);
+ seq_printf(s, "%d\n", *(int *)v);
+
+ return 0;
}
static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css,
@@ -5451,7 +5453,7 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
{
WARN_ON_ONCE(!rcu_read_lock_held());
- return idr_find(&ss->css_idr, id);
+ return id > 0 ? idr_find(&ss->css_idr, id) : NULL;
}
#ifdef CONFIG_CGROUP_DEBUG
diff --git a/kernel/cred.c b/kernel/cred.c
index e0573a43c7df..ec1c07667ec1 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -29,6 +29,9 @@
static struct kmem_cache *cred_jar;
+/* init to 2 - one for init_task, one to ensure it is never freed */
+struct group_info init_groups = { .usage = ATOMIC_INIT(2) };
+
/*
* The initial credentials for the initial task
*/
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 06917d537302..81aa3a4ece9f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6729,7 +6729,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
if (IS_ERR(prog))
return PTR_ERR(prog);
- if (prog->aux->prog_type != BPF_PROG_TYPE_KPROBE) {
+ if (prog->type != BPF_PROG_TYPE_KPROBE) {
/* valid fd, but invalid bpf program type */
bpf_prog_put(prog);
return -EINVAL;
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index 83d4382f5699..6873bb3e6b7e 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -20,145 +20,10 @@
#include <linux/types.h>
#include <linux/fs_struct.h>
-
-static void default_handler(int, struct pt_regs *);
-
-static struct exec_domain *exec_domains = &default_exec_domain;
-static DEFINE_RWLOCK(exec_domains_lock);
-
-
-static unsigned long ident_map[32] = {
- 0, 1, 2, 3, 4, 5, 6, 7,
- 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23,
- 24, 25, 26, 27, 28, 29, 30, 31
-};
-
-struct exec_domain default_exec_domain = {
- .name = "Linux", /* name */
- .handler = default_handler, /* lcall7 causes a seg fault. */
- .pers_low = 0, /* PER_LINUX personality. */
- .pers_high = 0, /* PER_LINUX personality. */
- .signal_map = ident_map, /* Identity map signals. */
- .signal_invmap = ident_map, /* - both ways. */
-};
-
-
-static void
-default_handler(int segment, struct pt_regs *regp)
-{
- set_personality(0);
-
- if (current_thread_info()->exec_domain->handler != default_handler)
- current_thread_info()->exec_domain->handler(segment, regp);
- else
- send_sig(SIGSEGV, current, 1);
-}
-
-static struct exec_domain *
-lookup_exec_domain(unsigned int personality)
-{
- unsigned int pers = personality(personality);
- struct exec_domain *ep;
-
- read_lock(&exec_domains_lock);
- for (ep = exec_domains; ep; ep = ep->next) {
- if (pers >= ep->pers_low && pers <= ep->pers_high)
- if (try_module_get(ep->module))
- goto out;
- }
-
-#ifdef CONFIG_MODULES
- read_unlock(&exec_domains_lock);
- request_module("personality-%d", pers);
- read_lock(&exec_domains_lock);
-
- for (ep = exec_domains; ep; ep = ep->next) {
- if (pers >= ep->pers_low && pers <= ep->pers_high)
- if (try_module_get(ep->module))
- goto out;
- }
-#endif
-
- ep = &default_exec_domain;
-out:
- read_unlock(&exec_domains_lock);
- return ep;
-}
-
-int
-register_exec_domain(struct exec_domain *ep)
-{
- struct exec_domain *tmp;
- int err = -EBUSY;
-
- if (ep == NULL)
- return -EINVAL;
-
- if (ep->next != NULL)
- return -EBUSY;
-
- write_lock(&exec_domains_lock);
- for (tmp = exec_domains; tmp; tmp = tmp->next) {
- if (tmp == ep)
- goto out;
- }
-
- ep->next = exec_domains;
- exec_domains = ep;
- err = 0;
-
-out:
- write_unlock(&exec_domains_lock);
- return err;
-}
-EXPORT_SYMBOL(register_exec_domain);
-
-int
-unregister_exec_domain(struct exec_domain *ep)
-{
- struct exec_domain **epp;
-
- epp = &exec_domains;
- write_lock(&exec_domains_lock);
- for (epp = &exec_domains; *epp; epp = &(*epp)->next) {
- if (ep == *epp)
- goto unregister;
- }
- write_unlock(&exec_domains_lock);
- return -EINVAL;
-
-unregister:
- *epp = ep->next;
- ep->next = NULL;
- write_unlock(&exec_domains_lock);
- return 0;
-}
-EXPORT_SYMBOL(unregister_exec_domain);
-
-int __set_personality(unsigned int personality)
-{
- struct exec_domain *oep = current_thread_info()->exec_domain;
-
- current_thread_info()->exec_domain = lookup_exec_domain(personality);
- current->personality = personality;
- module_put(oep->module);
-
- return 0;
-}
-EXPORT_SYMBOL(__set_personality);
-
#ifdef CONFIG_PROC_FS
static int execdomains_proc_show(struct seq_file *m, void *v)
{
- struct exec_domain *ep;
-
- read_lock(&exec_domains_lock);
- for (ep = exec_domains; ep; ep = ep->next)
- seq_printf(m, "%d-%d\t%-16s\t[%s]\n",
- ep->pers_low, ep->pers_high, ep->name,
- module_name(ep->module));
- read_unlock(&exec_domains_lock);
+ seq_puts(m, "0-0\tLinux \t[kernel]\n");
return 0;
}
diff --git a/kernel/exit.c b/kernel/exit.c
index feff10bbb307..22fcc05dec40 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -756,8 +756,6 @@ void do_exit(long code)
cgroup_exit(tsk);
- module_put(task_thread_info(tsk)->exec_domain->module);
-
/*
* FIXME: do that only when needed, using sched_exit tracepoint
*/
diff --git a/kernel/fork.c b/kernel/fork.c
index cf65139615a0..03c1eaaa6ef5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -74,6 +74,7 @@
#include <linux/uprobes.h>
#include <linux/aio.h>
#include <linux/compiler.h>
+#include <linux/sysctl.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -88,6 +89,16 @@
#include <trace/events/task.h>
/*
+ * Minimum number of threads to boot the kernel
+ */
+#define MIN_THREADS 20
+
+/*
+ * Maximum number of threads
+ */
+#define MAX_THREADS FUTEX_TID_MASK
+
+/*
* Protected counters by write_lock_irq(&tasklist_lock)
*/
unsigned long total_forks; /* Handle normal Linux uptimes. */
@@ -253,7 +264,30 @@ EXPORT_SYMBOL_GPL(__put_task_struct);
void __init __weak arch_task_cache_init(void) { }
-void __init fork_init(unsigned long mempages)
+/*
+ * set_max_threads
+ */
+static void set_max_threads(unsigned int max_threads_suggested)
+{
+ u64 threads;
+
+ /*
+ * The number of threads shall be limited such that the thread
+ * structures may only consume a small part of the available memory.
+ */
+ if (fls64(totalram_pages) + fls64(PAGE_SIZE) > 64)
+ threads = MAX_THREADS;
+ else
+ threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE,
+ (u64) THREAD_SIZE * 8UL);
+
+ if (threads > max_threads_suggested)
+ threads = max_threads_suggested;
+
+ max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS);
+}
+
+void __init fork_init(void)
{
#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
#ifndef ARCH_MIN_TASKALIGN
@@ -268,18 +302,7 @@ void __init fork_init(unsigned long mempages)
/* do the arch specific task caches init */
arch_task_cache_init();
- /*
- * The default maximum number of threads is set to a safe
- * value: the thread structures can take up at most half
- * of memory.
- */
- max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE);
-
- /*
- * we need to allow at least 20 threads to boot a system
- */
- if (max_threads < 20)
- max_threads = 20;
+ set_max_threads(MAX_THREADS);
init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
@@ -380,6 +403,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
*/
down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
+ /* No ordering required: file already has been exposed. */
+ RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
+
mm->total_vm = oldmm->total_vm;
mm->shared_vm = oldmm->shared_vm;
mm->exec_vm = oldmm->exec_vm;
@@ -505,7 +531,13 @@ static inline void mm_free_pgd(struct mm_struct *mm)
pgd_free(mm, mm->pgd);
}
#else
-#define dup_mmap(mm, oldmm) (0)
+static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+ down_write(&oldmm->mmap_sem);
+ RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
+ up_write(&oldmm->mmap_sem);
+ return 0;
+}
#define mm_alloc_pgd(mm) (0)
#define mm_free_pgd(mm)
#endif /* CONFIG_MMU */
@@ -674,34 +706,53 @@ void mmput(struct mm_struct *mm)
}
EXPORT_SYMBOL_GPL(mmput);
+/**
+ * set_mm_exe_file - change a reference to the mm's executable file
+ *
+ * This changes mm's executable file (shown as symlink /proc/[pid]/exe).
+ *
+ * Main users are mmput() and sys_execve(). Callers prevent concurrent
+ * invocations: in mmput() nobody alive left, in execve task is single
+ * threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the
+ * mm->exe_file, but does so without using set_mm_exe_file() in order
+ * to do avoid the need for any locks.
+ */
void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
{
+ struct file *old_exe_file;
+
+ /*
+ * It is safe to dereference the exe_file without RCU as
+ * this function is only called if nobody else can access
+ * this mm -- see comment above for justification.
+ */
+ old_exe_file = rcu_dereference_raw(mm->exe_file);
+
if (new_exe_file)
get_file(new_exe_file);
- if (mm->exe_file)
- fput(mm->exe_file);
- mm->exe_file = new_exe_file;
+ rcu_assign_pointer(mm->exe_file, new_exe_file);
+ if (old_exe_file)
+ fput(old_exe_file);
}
+/**
+ * get_mm_exe_file - acquire a reference to the mm's executable file
+ *
+ * Returns %NULL if mm has no associated executable file.
+ * User must release file via fput().
+ */
struct file *get_mm_exe_file(struct mm_struct *mm)
{
struct file *exe_file;
- /* We need mmap_sem to protect against races with removal of exe_file */
- down_read(&mm->mmap_sem);
- exe_file = mm->exe_file;
- if (exe_file)
- get_file(exe_file);
- up_read(&mm->mmap_sem);
+ rcu_read_lock();
+ exe_file = rcu_dereference(mm->exe_file);
+ if (exe_file && !get_file_rcu(exe_file))
+ exe_file = NULL;
+ rcu_read_unlock();
return exe_file;
}
-
-static void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
-{
- /* It's safe to write the exe_file pointer without exe_file_lock because
- * this is called during fork when the task is not yet in /proc */
- newmm->exe_file = get_mm_exe_file(oldmm);
-}
+EXPORT_SYMBOL(get_mm_exe_file);
/**
* get_task_mm - acquire a reference to the task's mm
@@ -864,8 +915,6 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
if (!mm_init(mm, tsk))
goto fail_nomem;
- dup_mm_exe_file(oldmm, mm);
-
err = dup_mmap(mm, oldmm);
if (err)
goto free_pt;
@@ -1279,9 +1328,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
if (nr_threads >= max_threads)
goto bad_fork_cleanup_count;
- if (!try_module_get(task_thread_info(p)->exec_domain->module))
- goto bad_fork_cleanup_count;
-
delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
p->flags |= PF_FORKNOEXEC;
@@ -1406,10 +1452,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
goto bad_fork_cleanup_io;
if (pid != &init_struct_pid) {
- retval = -ENOMEM;
pid = alloc_pid(p->nsproxy->pid_ns_for_children);
- if (!pid)
+ if (IS_ERR(pid)) {
+ retval = PTR_ERR(pid);
goto bad_fork_cleanup_io;
+ }
}
p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
@@ -1590,7 +1637,6 @@ bad_fork_cleanup_threadgroup_lock:
if (clone_flags & CLONE_THREAD)
threadgroup_change_end(current);
delayacct_tsk_free(p);
- module_put(task_thread_info(p)->exec_domain->module);
bad_fork_cleanup_count:
atomic_dec(&p->cred->user->processes);
exit_creds(p);
@@ -2004,3 +2050,26 @@ int unshare_files(struct files_struct **displaced)
task_unlock(task);
return 0;
}
+
+int sysctl_max_threads(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table t;
+ int ret;
+ int threads = max_threads;
+ int min = MIN_THREADS;
+ int max = MAX_THREADS;
+
+ t = *table;
+ t.data = &threads;
+ t.extra1 = &min;
+ t.extra2 = &max;
+
+ ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
+ if (ret || !write)
+ return ret;
+
+ set_max_threads(threads);
+
+ return 0;
+}
diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c
index b358a802fd18..a744098e4eb7 100644
--- a/kernel/gcov/base.c
+++ b/kernel/gcov/base.c
@@ -18,6 +18,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/sched.h>
#include "gcov.h"
static int gcov_events_enabled;
@@ -107,8 +108,10 @@ void gcov_enable_events(void)
gcov_events_enabled = 1;
/* Perform event callback for previously registered entries. */
- while ((info = gcov_info_next(info)))
+ while ((info = gcov_info_next(info))) {
gcov_event(GCOV_ADD, info);
+ cond_resched();
+ }
mutex_unlock(&gcov_lock);
}
diff --git a/kernel/groups.c b/kernel/groups.c
index 664411f171b5..74d431d25251 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -9,9 +9,6 @@
#include <linux/user_namespace.h>
#include <asm/uaccess.h>
-/* init to 2 - one for init_task, one to ensure it is never freed */
-struct group_info init_groups = { .usage = ATOMIC_INIT(2) };
-
struct group_info *groups_alloc(int gidsetsize)
{
struct group_info *group_info;
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 06db12434d72..e0f90c2b57aa 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -169,7 +169,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
return;
rcu_read_lock();
- do_each_thread(g, t) {
+ for_each_process_thread(g, t) {
if (!max_count--)
goto unlock;
if (!--batch_count) {
@@ -180,7 +180,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
if (t->state == TASK_UNINTERRUPTIBLE)
check_hung_task(t, timeout);
- } while_each_thread(g, t);
+ }
unlock:
rcu_read_unlock();
}
diff --git a/kernel/pid.c b/kernel/pid.c
index cd36a5e0d173..4fd07d5b7baf 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -182,7 +182,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
spin_unlock_irq(&pidmap_lock);
kfree(page);
if (unlikely(!map->page))
- break;
+ return -ENOMEM;
}
if (likely(atomic_read(&map->nr_free))) {
for ( ; ; ) {
@@ -210,7 +210,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
}
pid = mk_pid(pid_ns, map, offset);
}
- return -1;
+ return -EAGAIN;
}
int next_pidmap(struct pid_namespace *pid_ns, unsigned int last)
@@ -301,17 +301,20 @@ struct pid *alloc_pid(struct pid_namespace *ns)
int i, nr;
struct pid_namespace *tmp;
struct upid *upid;
+ int retval = -ENOMEM;
pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
if (!pid)
- goto out;
+ return ERR_PTR(retval);
tmp = ns;
pid->level = ns->level;
for (i = ns->level; i >= 0; i--) {
nr = alloc_pidmap(tmp);
- if (nr < 0)
+ if (IS_ERR_VALUE(nr)) {
+ retval = nr;
goto out_free;
+ }
pid->numbers[i].nr = nr;
pid->numbers[i].ns = tmp;
@@ -339,7 +342,6 @@ struct pid *alloc_pid(struct pid_namespace *ns)
}
spin_unlock_irq(&pidmap_lock);
-out:
return pid;
out_unlock:
@@ -351,8 +353,7 @@ out_free:
free_pidmap(pid->numbers + i);
kmem_cache_free(ns->pid_cachep, pid);
- pid = NULL;
- goto out;
+ return ERR_PTR(retval);
}
void disable_pid_allocation(struct pid_namespace *ns)
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 227fec36b12a..c8e0e050a36a 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -456,8 +456,6 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
static int ptrace_detach(struct task_struct *child, unsigned int data)
{
- bool dead = false;
-
if (!valid_signal(data))
return -EIO;
@@ -467,18 +465,19 @@ static int ptrace_detach(struct task_struct *child, unsigned int data)
write_lock_irq(&tasklist_lock);
/*
- * This child can be already killed. Make sure de_thread() or
- * our sub-thread doing do_wait() didn't do release_task() yet.
+ * We rely on ptrace_freeze_traced(). It can't be killed and
+ * untraced by another thread, it can't be a zombie.
*/
- if (child->ptrace) {
- child->exit_code = data;
- dead = __ptrace_detach(current, child);
- }
+ WARN_ON(!child->ptrace || child->exit_state);
+ /*
+ * tasklist_lock avoids the race with wait_task_stopped(), see
+ * the comment in ptrace_resume().
+ */
+ child->exit_code = data;
+ __ptrace_detach(current, child);
write_unlock_irq(&tasklist_lock);
proc_ptrace_connector(child, PTRACE_DETACH);
- if (unlikely(dead))
- release_task(child);
return 0;
}
@@ -697,6 +696,8 @@ static int ptrace_peek_siginfo(struct task_struct *child,
static int ptrace_resume(struct task_struct *child, long request,
unsigned long data)
{
+ bool need_siglock;
+
if (!valid_signal(data))
return -EIO;
@@ -724,8 +725,26 @@ static int ptrace_resume(struct task_struct *child, long request,
user_disable_single_step(child);
}
+ /*
+ * Change ->exit_code and ->state under siglock to avoid the race
+ * with wait_task_stopped() in between; a non-zero ->exit_code will
+ * wrongly look like another report from tracee.
+ *
+ * Note that we need siglock even if ->exit_code == data and/or this
+ * status was not reported yet, the new status must not be cleared by
+ * wait_task_stopped() after resume.
+ *
+ * If data == 0 we do not care if wait_task_stopped() reports the old
+ * status and clears the code too; this can't race with the tracee, it
+ * takes siglock after resume.
+ */
+ need_siglock = data && !thread_group_empty(current);
+ if (need_siglock)
+ spin_lock_irq(&child->sighand->siglock);
child->exit_code = data;
wake_up_state(child, __TASK_TRACED);
+ if (need_siglock)
+ spin_unlock_irq(&child->sighand->siglock);
return 0;
}
diff --git a/kernel/reboot.c b/kernel/reboot.c
index 5925f5ae8dff..d20c85d9f8c0 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -387,8 +387,9 @@ void ctrl_alt_del(void)
}
char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
+static const char reboot_cmd[] = "/sbin/reboot";
-static int __orderly_poweroff(bool force)
+static int run_cmd(const char *cmd)
{
char **argv;
static char *envp[] = {
@@ -397,8 +398,7 @@ static int __orderly_poweroff(bool force)
NULL
};
int ret;
-
- argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL);
+ argv = argv_split(GFP_KERNEL, cmd, NULL);
if (argv) {
ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
argv_free(argv);
@@ -406,8 +406,33 @@ static int __orderly_poweroff(bool force)
ret = -ENOMEM;
}
+ return ret;
+}
+
+static int __orderly_reboot(void)
+{
+ int ret;
+
+ ret = run_cmd(reboot_cmd);
+
+ if (ret) {
+ pr_warn("Failed to start orderly reboot: forcing the issue\n");
+ emergency_sync();
+ kernel_restart(NULL);
+ }
+
+ return ret;
+}
+
+static int __orderly_poweroff(bool force)
+{
+ int ret;
+
+ ret = run_cmd(poweroff_cmd);
+
if (ret && force) {
pr_warn("Failed to start orderly shutdown: forcing the issue\n");
+
/*
* I guess this should try to kick off some daemon to sync and
* poweroff asap. Or not even bother syncing if we're doing an
@@ -436,15 +461,33 @@ static DECLARE_WORK(poweroff_work, poweroff_work_func);
* This may be called from any context to trigger a system shutdown.
* If the orderly shutdown fails, it will force an immediate shutdown.
*/
-int orderly_poweroff(bool force)
+void orderly_poweroff(bool force)
{
if (force) /* do not override the pending "true" */
poweroff_force = true;
schedule_work(&poweroff_work);
- return 0;
}
EXPORT_SYMBOL_GPL(orderly_poweroff);
+static void reboot_work_func(struct work_struct *work)
+{
+ __orderly_reboot();
+}
+
+static DECLARE_WORK(reboot_work, reboot_work_func);
+
+/**
+ * orderly_reboot - Trigger an orderly system reboot
+ *
+ * This may be called from any context to trigger a system reboot.
+ * If the orderly reboot fails, it will force an immediate reboot.
+ */
+void orderly_reboot(void)
+{
+ schedule_work(&reboot_work);
+}
+EXPORT_SYMBOL_GPL(orderly_reboot);
+
static int __init reboot_setup(char *str)
{
for (;;) {
diff --git a/kernel/resource.c b/kernel/resource.c
index 19f2357dfda3..90552aab5f2d 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1034,8 +1034,6 @@ resource_size_t resource_alignment(struct resource *res)
*
* request_region creates a new busy region.
*
- * check_region returns non-zero if the area is already busy.
- *
* release_region releases a matching busy region.
*/
@@ -1098,36 +1096,6 @@ struct resource * __request_region(struct resource *parent,
EXPORT_SYMBOL(__request_region);
/**
- * __check_region - check if a resource region is busy or free
- * @parent: parent resource descriptor
- * @start: resource start address
- * @n: resource region size
- *
- * Returns 0 if the region is free at the moment it is checked,
- * returns %-EBUSY if the region is busy.
- *
- * NOTE:
- * This function is deprecated because its use is racy.
- * Even if it returns 0, a subsequent call to request_region()
- * may fail because another driver etc. just allocated the region.
- * Do NOT use it. It will be removed from the kernel.
- */
-int __check_region(struct resource *parent, resource_size_t start,
- resource_size_t n)
-{
- struct resource * res;
-
- res = __request_region(parent, start, n, "check-region", 0);
- if (!res)
- return -EBUSY;
-
- release_resource(res);
- free_resource(res);
- return 0;
-}
-EXPORT_SYMBOL(__check_region);
-
-/**
* __release_region - release a previously reserved resource region
* @parent: parent resource descriptor
* @start: resource start address
diff --git a/kernel/signal.c b/kernel/signal.c
index a390499943e4..d51c5ddd855c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2992,11 +2992,9 @@ static int do_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t *info)
* Nor can they impersonate a kill()/tgkill(), which adds source info.
*/
if ((info->si_code >= 0 || info->si_code == SI_TKILL) &&
- (task_pid_vnr(current) != pid)) {
- /* We used to allow any < 0 si_code */
- WARN_ON_ONCE(info->si_code < 0);
+ (task_pid_vnr(current) != pid))
return -EPERM;
- }
+
info->si_signo = sig;
/* POSIX.1b doesn't mention process groups. */
@@ -3041,12 +3039,10 @@ static int do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
/* Not even root can pretend to send signals from the kernel.
* Nor can they impersonate a kill()/tgkill(), which adds source info.
*/
- if (((info->si_code >= 0 || info->si_code == SI_TKILL)) &&
- (task_pid_vnr(current) != pid)) {
- /* We used to allow any < 0 si_code */
- WARN_ON_ONCE(info->si_code < 0);
+ if ((info->si_code >= 0 || info->si_code == SI_TKILL) &&
+ (task_pid_vnr(current) != pid))
return -EPERM;
- }
+
info->si_signo = sig;
return do_send_specific(tgid, pid, sig, info);
diff --git a/kernel/sys.c b/kernel/sys.c
index a03d9cd23ed7..a4e372b798a5 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -325,6 +325,7 @@ out_unlock:
* SMP: There are not races, the GIDs are checked only by filesystem
* operations (as far as semantic preservation is concerned).
*/
+#ifdef CONFIG_MULTIUSER
SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
{
struct user_namespace *ns = current_user_ns();
@@ -815,6 +816,7 @@ change_okay:
commit_creds(new);
return old_fsgid;
}
+#endif /* CONFIG_MULTIUSER */
/**
* sys_getpid - return the thread group id of the current process
@@ -1647,14 +1649,13 @@ SYSCALL_DEFINE1(umask, int, mask)
return mask;
}
-static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd)
+static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
{
struct fd exe;
+ struct file *old_exe, *exe_file;
struct inode *inode;
int err;
- VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm);
-
exe = fdget(fd);
if (!exe.file)
return -EBADF;
@@ -1678,15 +1679,22 @@ static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd)
/*
* Forbid mm->exe_file change if old file still mapped.
*/
+ exe_file = get_mm_exe_file(mm);
err = -EBUSY;
- if (mm->exe_file) {
+ if (exe_file) {
struct vm_area_struct *vma;
- for (vma = mm->mmap; vma; vma = vma->vm_next)
- if (vma->vm_file &&
- path_equal(&vma->vm_file->f_path,
- &mm->exe_file->f_path))
- goto exit;
+ down_read(&mm->mmap_sem);
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ if (!vma->vm_file)
+ continue;
+ if (path_equal(&vma->vm_file->f_path,
+ &exe_file->f_path))
+ goto exit_err;
+ }
+
+ up_read(&mm->mmap_sem);
+ fput(exe_file);
}
/*
@@ -1700,10 +1708,18 @@ static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd)
goto exit;
err = 0;
- set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */
+ /* set the new file, lockless */
+ get_file(exe.file);
+ old_exe = xchg(&mm->exe_file, exe.file);
+ if (old_exe)
+ fput(old_exe);
exit:
fdput(exe);
return err;
+exit_err:
+ up_read(&mm->mmap_sem);
+ fput(exe_file);
+ goto exit;
}
#ifdef CONFIG_CHECKPOINT_RESTORE
@@ -1838,10 +1854,9 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL;
}
- down_write(&mm->mmap_sem);
if (prctl_map.exe_fd != (u32)-1)
- error = prctl_set_mm_exe_file_locked(mm, prctl_map.exe_fd);
- downgrade_write(&mm->mmap_sem);
+ error = prctl_set_mm_exe_file(mm, prctl_map.exe_fd);
+ down_read(&mm->mmap_sem);
if (error)
goto out;
@@ -1907,12 +1922,8 @@ static int prctl_set_mm(int opt, unsigned long addr,
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
- if (opt == PR_SET_MM_EXE_FILE) {
- down_write(&mm->mmap_sem);
- error = prctl_set_mm_exe_file_locked(mm, (unsigned int)addr);
- up_write(&mm->mmap_sem);
- return error;
- }
+ if (opt == PR_SET_MM_EXE_FILE)
+ return prctl_set_mm_exe_file(mm, (unsigned int)addr);
if (addr >= TASK_SIZE || addr < mmap_min_addr)
return -EINVAL;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 5adcb0ae3a58..7995ef5868d8 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -159,6 +159,20 @@ cond_syscall(sys_uselib);
cond_syscall(sys_fadvise64);
cond_syscall(sys_fadvise64_64);
cond_syscall(sys_madvise);
+cond_syscall(sys_setuid);
+cond_syscall(sys_setregid);
+cond_syscall(sys_setgid);
+cond_syscall(sys_setreuid);
+cond_syscall(sys_setresuid);
+cond_syscall(sys_getresuid);
+cond_syscall(sys_setresgid);
+cond_syscall(sys_getresgid);
+cond_syscall(sys_setgroups);
+cond_syscall(sys_getgroups);
+cond_syscall(sys_setfsuid);
+cond_syscall(sys_setfsgid);
+cond_syscall(sys_capget);
+cond_syscall(sys_capset);
/* arch-specific weak syscall entries */
cond_syscall(sys_pciconfig_read);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8c0eabd41886..2082b1a88fb9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -93,11 +93,9 @@
#include <linux/nmi.h>
#endif
-
#if defined(CONFIG_SYSCTL)
/* External variables not in a header file. */
-extern int max_threads;
extern int suid_dumpable;
#ifdef CONFIG_COREDUMP
extern int core_uses_pid;
@@ -710,10 +708,10 @@ static struct ctl_table kern_table[] = {
#endif
{
.procname = "threads-max",
- .data = &max_threads,
+ .data = NULL,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = sysctl_max_threads,
},
{
.procname = "random",
@@ -1335,6 +1333,15 @@ static struct ctl_table vm_table[] = {
.extra1 = &min_extfrag_threshold,
.extra2 = &max_extfrag_threshold,
},
+ {
+ .procname = "compact_unevictable_allowed",
+ .data = &sysctl_compact_unevictable_allowed,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
#endif /* CONFIG_COMPACTION */
{
@@ -1974,7 +1981,15 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
int write, void *data)
{
if (write) {
- *valp = *negp ? -*lvalp : *lvalp;
+ if (*negp) {
+ if (*lvalp > (unsigned long) INT_MAX + 1)
+ return -EINVAL;
+ *valp = -*lvalp;
+ } else {
+ if (*lvalp > (unsigned long) INT_MAX)
+ return -EINVAL;
+ *valp = *lvalp;
+ }
} else {
int val = *valp;
if (val < 0) {
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index c3e4fcfddd45..3f34496244e9 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -327,11 +327,11 @@ static void t_stop(struct seq_file *m, void *p)
local_irq_enable();
}
-static int trace_lookup_stack(struct seq_file *m, long i)
+static void trace_lookup_stack(struct seq_file *m, long i)
{
unsigned long addr = stack_dump_trace[i];
- return seq_printf(m, "%pS\n", (void *)addr);
+ seq_printf(m, "%pS\n", (void *)addr);
}
static void print_disabled(struct seq_file *m)