aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile6
-rw-r--r--kernel/audit.c96
-rw-r--r--kernel/audit.h1
-rw-r--r--kernel/auditsc.c5
-rw-r--r--kernel/cpuset.c6
-rw-r--r--kernel/exit.c31
-rw-r--r--kernel/fork.c9
-rw-r--r--kernel/futex.c138
-rw-r--r--kernel/hrtimer.c2
-rw-r--r--kernel/irq/spurious.c12
-rw-r--r--kernel/kallsyms.c11
-rw-r--r--kernel/kfifo.c3
-rw-r--r--kernel/kthread.c2
-rw-r--r--kernel/module.c62
-rw-r--r--kernel/nsproxy.c72
-rw-r--r--kernel/params.c1
-rw-r--r--kernel/pid.c2
-rw-r--r--kernel/printk.c55
-rw-r--r--kernel/ptrace.c7
-rw-r--r--kernel/relay.c6
-rw-r--r--kernel/rtmutex-debug.c6
-rw-r--r--kernel/rtmutex.c6
-rw-r--r--kernel/rtmutex_common.h9
-rw-r--r--kernel/sched.c55
-rw-r--r--kernel/sched_debug.c2
-rw-r--r--kernel/seccomp.c29
-rw-r--r--kernel/signal.c33
-rw-r--r--kernel/softirq.c6
-rw-r--r--kernel/stop_machine.c8
-rw-r--r--kernel/sys.c13
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl.c43
-rw-r--r--kernel/taskstats.c4
-rw-r--r--kernel/time.c32
-rw-r--r--kernel/time/clockevents.c41
-rw-r--r--kernel/time/ntp.c2
-rw-r--r--kernel/time/timekeeping.c37
-rw-r--r--kernel/time/timer_stats.c14
-rw-r--r--kernel/timer.c15
-rw-r--r--kernel/user.c18
-rw-r--r--kernel/user_namespace.c87
-rw-r--r--kernel/utsname.c12
-rw-r--r--kernel/utsname_sysctl.c5
-rw-r--r--kernel/workqueue.c45
44 files changed, 726 insertions, 324 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 642d4277c2ea..2a999836ca18 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -4,11 +4,12 @@
obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
exit.o itimer.o time.o softirq.o resource.o \
- sysctl.o capability.o ptrace.o timer.o user.o \
+ sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \
signal.o sys.o kmod.o workqueue.o pid.o \
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
- hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o
+ hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \
+ utsname.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += time/
@@ -48,7 +49,6 @@ obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
obj-$(CONFIG_RELAY) += relay.o
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
-obj-$(CONFIG_UTS_NS) += utsname.o
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
diff --git a/kernel/audit.c b/kernel/audit.c
index d13276d41410..5ce8851facf7 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -58,6 +58,7 @@
#include <linux/selinux.h>
#include <linux/inotify.h>
#include <linux/freezer.h>
+#include <linux/tty.h>
#include "audit.h"
@@ -423,6 +424,31 @@ static int kauditd_thread(void *dummy)
return 0;
}
+static int audit_prepare_user_tty(pid_t pid, uid_t loginuid)
+{
+ struct task_struct *tsk;
+ int err;
+
+ read_lock(&tasklist_lock);
+ tsk = find_task_by_pid(pid);
+ err = -ESRCH;
+ if (!tsk)
+ goto out;
+ err = 0;
+
+ spin_lock_irq(&tsk->sighand->siglock);
+ if (!tsk->signal->audit_tty)
+ err = -EPERM;
+ spin_unlock_irq(&tsk->sighand->siglock);
+ if (err)
+ goto out;
+
+ tty_audit_push_task(tsk, loginuid);
+out:
+ read_unlock(&tasklist_lock);
+ return err;
+}
+
int audit_send_list(void *_dest)
{
struct audit_netlink_list *dest = _dest;
@@ -511,6 +537,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
case AUDIT_DEL:
case AUDIT_DEL_RULE:
case AUDIT_SIGNAL_INFO:
+ case AUDIT_TTY_GET:
+ case AUDIT_TTY_SET:
if (security_netlink_recv(skb, CAP_AUDIT_CONTROL))
err = -EPERM;
break;
@@ -622,6 +650,11 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
err = audit_filter_user(&NETLINK_CB(skb), msg_type);
if (err == 1) {
err = 0;
+ if (msg_type == AUDIT_USER_TTY) {
+ err = audit_prepare_user_tty(pid, loginuid);
+ if (err)
+ break;
+ }
ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
if (ab) {
audit_log_format(ab,
@@ -638,8 +671,17 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
" subj=%s", ctx);
kfree(ctx);
}
- audit_log_format(ab, " msg='%.1024s'",
- (char *)data);
+ if (msg_type != AUDIT_USER_TTY)
+ audit_log_format(ab, " msg='%.1024s'",
+ (char *)data);
+ else {
+ int size;
+
+ audit_log_format(ab, " msg=");
+ size = nlmsg_len(nlh);
+ audit_log_n_untrustedstring(ab, size,
+ data);
+ }
audit_set_pid(ab, pid);
audit_log_end(ab);
}
@@ -730,6 +772,45 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
0, 0, sig_data, sizeof(*sig_data) + len);
kfree(sig_data);
break;
+ case AUDIT_TTY_GET: {
+ struct audit_tty_status s;
+ struct task_struct *tsk;
+
+ read_lock(&tasklist_lock);
+ tsk = find_task_by_pid(pid);
+ if (!tsk)
+ err = -ESRCH;
+ else {
+ spin_lock_irq(&tsk->sighand->siglock);
+ s.enabled = tsk->signal->audit_tty != 0;
+ spin_unlock_irq(&tsk->sighand->siglock);
+ }
+ read_unlock(&tasklist_lock);
+ audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_TTY_GET, 0, 0,
+ &s, sizeof(s));
+ break;
+ }
+ case AUDIT_TTY_SET: {
+ struct audit_tty_status *s;
+ struct task_struct *tsk;
+
+ if (nlh->nlmsg_len < sizeof(struct audit_tty_status))
+ return -EINVAL;
+ s = data;
+ if (s->enabled != 0 && s->enabled != 1)
+ return -EINVAL;
+ read_lock(&tasklist_lock);
+ tsk = find_task_by_pid(pid);
+ if (!tsk)
+ err = -ESRCH;
+ else {
+ spin_lock_irq(&tsk->sighand->siglock);
+ tsk->signal->audit_tty = s->enabled != 0;
+ spin_unlock_irq(&tsk->sighand->siglock);
+ }
+ read_unlock(&tasklist_lock);
+ break;
+ }
default:
err = -EINVAL;
break;
@@ -1185,7 +1266,7 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen,
}
/**
- * audit_log_n_unstrustedstring - log a string that may contain random characters
+ * audit_log_n_untrustedstring - log a string that may contain random characters
* @ab: audit_buffer
* @len: lenth of string (not including trailing null)
* @string: string to be logged
@@ -1201,25 +1282,24 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen,
const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len,
const char *string)
{
- const unsigned char *p = string;
+ const unsigned char *p;
- while (*p) {
+ for (p = string; p < (const unsigned char *)string + len && *p; p++) {
if (*p == '"' || *p < 0x21 || *p > 0x7f) {
audit_log_hex(ab, string, len);
return string + len + 1;
}
- p++;
}
audit_log_n_string(ab, len, string);
return p + 1;
}
/**
- * audit_log_unstrustedstring - log a string that may contain random characters
+ * audit_log_untrustedstring - log a string that may contain random characters
* @ab: audit_buffer
* @string: string to be logged
*
- * Same as audit_log_n_unstrustedstring(), except that strlen is used to
+ * Same as audit_log_n_untrustedstring(), except that strlen is used to
* determine string length.
*/
const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string)
diff --git a/kernel/audit.h b/kernel/audit.h
index 815d6f5c04ee..95877435c347 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -115,7 +115,6 @@ extern struct sk_buff * audit_make_reply(int pid, int seq, int type,
extern void audit_send_reply(int pid, int seq, int type,
int done, int multi,
void *payload, int size);
-extern void audit_log_lost(const char *message);
extern void audit_panic(const char *message);
struct audit_netlink_list {
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e36481ed61b4..b7640a5f382a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -71,9 +71,6 @@
extern struct list_head audit_filter_list[];
-/* No syscall auditing will take place unless audit_enabled != 0. */
-extern int audit_enabled;
-
/* AUDIT_NAMES is the number of slots we reserve in the audit_context
* for saving names from getname(). */
#define AUDIT_NAMES 20
@@ -2040,7 +2037,7 @@ int __audit_signal_info(int sig, struct task_struct *t)
/**
* audit_core_dumps - record information about processes that end abnormally
- * @sig: signal value
+ * @signr: signal value
*
* If a process ends with a core dump, something fishy is going on and we
* should record the event for investigation.
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 4c49188cc49b..824b1c01f410 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -981,10 +981,10 @@ static int update_nodemask(struct cpuset *cs, char *buf)
mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
if (!mmarray)
goto done;
- write_lock_irq(&tasklist_lock); /* block fork */
+ read_lock(&tasklist_lock); /* block fork */
if (atomic_read(&cs->count) <= ntasks)
break; /* got enough */
- write_unlock_irq(&tasklist_lock); /* try again */
+ read_unlock(&tasklist_lock); /* try again */
kfree(mmarray);
}
@@ -1006,7 +1006,7 @@ static int update_nodemask(struct cpuset *cs, char *buf)
continue;
mmarray[n++] = mm;
} while_each_thread(g, p);
- write_unlock_irq(&tasklist_lock);
+ read_unlock(&tasklist_lock);
/*
* Now that we've dropped the tasklist spinlock, we can
diff --git a/kernel/exit.c b/kernel/exit.c
index ca6a11b73023..57626692cd90 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -858,6 +858,34 @@ static void exit_notify(struct task_struct *tsk)
release_task(tsk);
}
+#ifdef CONFIG_DEBUG_STACK_USAGE
+static void check_stack_usage(void)
+{
+ static DEFINE_SPINLOCK(low_water_lock);
+ static int lowest_to_date = THREAD_SIZE;
+ unsigned long *n = end_of_stack(current);
+ unsigned long free;
+
+ while (*n == 0)
+ n++;
+ free = (unsigned long)n - (unsigned long)end_of_stack(current);
+
+ if (free >= lowest_to_date)
+ return;
+
+ spin_lock(&low_water_lock);
+ if (free < lowest_to_date) {
+ printk(KERN_WARNING "%s used greatest stack depth: %lu bytes "
+ "left\n",
+ current->comm, free);
+ lowest_to_date = free;
+ }
+ spin_unlock(&low_water_lock);
+}
+#else
+static inline void check_stack_usage(void) {}
+#endif
+
fastcall NORET_TYPE void do_exit(long code)
{
struct task_struct *tsk = current;
@@ -937,6 +965,8 @@ fastcall NORET_TYPE void do_exit(long code)
if (unlikely(tsk->compat_robust_list))
compat_exit_robust_list(tsk);
#endif
+ if (group_dead)
+ tty_audit_exit();
if (unlikely(tsk->audit_context))
audit_free(tsk);
@@ -949,6 +979,7 @@ fastcall NORET_TYPE void do_exit(long code)
exit_sem(tsk);
__exit_files(tsk);
__exit_fs(tsk);
+ check_stack_usage();
exit_thread();
cpuset_exit(tsk);
exit_keys(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index da3a155bba0d..7c5c5888e00a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -49,6 +49,7 @@
#include <linux/delayacct.h>
#include <linux/taskstats_kern.h>
#include <linux/random.h>
+#include <linux/tty.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -897,6 +898,8 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
}
acct_init_pacct(&sig->pacct);
+ tty_audit_fork(sig);
+
return 0;
}
@@ -999,7 +1002,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
if (atomic_read(&p->user->processes) >=
p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
- p->user != &root_user)
+ p->user != current->nsproxy->user_ns->root_user)
goto bad_fork_free;
}
@@ -1059,6 +1062,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->lock_depth = -1; /* -1 = no lock */
do_posix_clock_monotonic_gettime(&p->start_time);
+ p->real_start_time = p->start_time;
+ monotonic_to_bootbased(&p->real_start_time);
p->security = NULL;
p->io_context = NULL;
p->io_wait = NULL;
@@ -1601,7 +1606,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
err = -EINVAL;
if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
- CLONE_NEWUTS|CLONE_NEWIPC))
+ CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER))
goto bad_unshare_out;
if ((err = unshare_thread(unshare_flags)))
diff --git a/kernel/futex.c b/kernel/futex.c
index 45490bec5831..5c3f45d07c53 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -121,6 +121,24 @@ static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
static struct vfsmount *futex_mnt;
/*
+ * Take mm->mmap_sem, when futex is shared
+ */
+static inline void futex_lock_mm(struct rw_semaphore *fshared)
+{
+ if (fshared)
+ down_read(fshared);
+}
+
+/*
+ * Release mm->mmap_sem, when the futex is shared
+ */
+static inline void futex_unlock_mm(struct rw_semaphore *fshared)
+{
+ if (fshared)
+ up_read(fshared);
+}
+
+/*
* We hash on the keys returned from get_futex_key (see below).
*/
static struct futex_hash_bucket *hash_futex(union futex_key *key)
@@ -287,7 +305,18 @@ void drop_futex_key_refs(union futex_key *key)
}
EXPORT_SYMBOL_GPL(drop_futex_key_refs);
-static inline int get_futex_value_locked(u32 *dest, u32 __user *from)
+static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
+{
+ u32 curval;
+
+ pagefault_disable();
+ curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
+ pagefault_enable();
+
+ return curval;
+}
+
+static int get_futex_value_locked(u32 *dest, u32 __user *from)
{
int ret;
@@ -620,9 +649,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
newval = FUTEX_WAITERS | new_owner->pid;
- pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
- pagefault_enable();
+ curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
if (curval == -EFAULT)
ret = -EFAULT;
@@ -659,9 +686,7 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
* There is no waiter, so we unlock the futex. The owner died
* bit has not to be preserved here. We are the owner:
*/
- pagefault_disable();
- oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0);
- pagefault_enable();
+ oldval = cmpxchg_futex_value_locked(uaddr, uval, 0);
if (oldval == -EFAULT)
return oldval;
@@ -700,8 +725,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
union futex_key key;
int ret;
- if (fshared)
- down_read(fshared);
+ futex_lock_mm(fshared);
ret = get_futex_key(uaddr, fshared, &key);
if (unlikely(ret != 0))
@@ -725,8 +749,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
spin_unlock(&hb->lock);
out:
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
return ret;
}
@@ -746,8 +769,7 @@ futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared,
int ret, op_ret, attempt = 0;
retryfull:
- if (fshared)
- down_read(fshared);
+ futex_lock_mm(fshared);
ret = get_futex_key(uaddr1, fshared, &key1);
if (unlikely(ret != 0))
@@ -793,7 +815,7 @@ retry:
*/
if (attempt++) {
ret = futex_handle_fault((unsigned long)uaddr2,
- fshared, attempt);
+ fshared, attempt);
if (ret)
goto out;
goto retry;
@@ -803,8 +825,7 @@ retry:
* If we would have faulted, release mmap_sem,
* fault it in and start all over again.
*/
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
ret = get_user(dummy, uaddr2);
if (ret)
@@ -841,8 +862,8 @@ retry:
if (hb1 != hb2)
spin_unlock(&hb2->lock);
out:
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
+
return ret;
}
@@ -861,8 +882,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
int ret, drop_count = 0;
retry:
- if (fshared)
- down_read(fshared);
+ futex_lock_mm(fshared);
ret = get_futex_key(uaddr1, fshared, &key1);
if (unlikely(ret != 0))
@@ -890,8 +910,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
* If we would have faulted, release mmap_sem, fault
* it in and start all over again.
*/
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
ret = get_user(curval, uaddr1);
@@ -944,8 +963,7 @@ out_unlock:
drop_futex_key_refs(&key1);
out:
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
return ret;
}
@@ -1113,10 +1131,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
while (!ret) {
newval = (uval & FUTEX_OWNER_DIED) | newtid;
- pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr,
- uval, newval);
- pagefault_enable();
+ curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
if (curval == -EFAULT)
ret = -EFAULT;
@@ -1134,6 +1149,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
#define ARG3_SHARED 1
static long futex_wait_restart(struct restart_block *restart);
+
static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
u32 val, ktime_t *abs_time)
{
@@ -1148,8 +1164,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
q.pi_state = NULL;
retry:
- if (fshared)
- down_read(fshared);
+ futex_lock_mm(fshared);
ret = get_futex_key(uaddr, fshared, &q.key);
if (unlikely(ret != 0))
@@ -1186,8 +1201,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
* If we would have faulted, release mmap_sem, fault it in and
* start all over again.
*/
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
ret = get_user(uval, uaddr);
@@ -1206,8 +1220,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
* Now the futex is queued and we have checked the data, we
* don't want to hold mmap_sem while we sleep.
*/
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
/*
* There might have been scheduling since the queue_me(), as we
@@ -1285,8 +1298,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
queue_unlock(&q, hb);
out_release_sem:
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
return ret;
}
@@ -1333,8 +1345,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
q.pi_state = NULL;
retry:
- if (fshared)
- down_read(fshared);
+ futex_lock_mm(fshared);
ret = get_futex_key(uaddr, fshared, &q.key);
if (unlikely(ret != 0))
@@ -1353,9 +1364,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
*/
newval = current->pid;
- pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval);
- pagefault_enable();
+ curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
if (unlikely(curval == -EFAULT))
goto uaddr_faulted;
@@ -1398,9 +1407,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
lock_taken = 1;
}
- pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
- pagefault_enable();
+ curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
if (unlikely(curval == -EFAULT))
goto uaddr_faulted;
@@ -1428,8 +1435,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
* exit to complete.
*/
queue_unlock(&q, hb);
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
cond_resched();
goto retry;
@@ -1465,8 +1471,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
* Now the futex is queued and we have checked the data, we
* don't want to hold mmap_sem while we sleep.
*/
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
WARN_ON(!q.pi_state);
/*
@@ -1480,8 +1485,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
ret = ret ? 0 : -EWOULDBLOCK;
}
- if (fshared)
- down_read(fshared);
+ futex_lock_mm(fshared);
spin_lock(q.lock_ptr);
if (!ret) {
@@ -1518,8 +1522,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
/* Unqueue and drop the lock */
unqueue_me_pi(&q);
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
return ret != -EINTR ? ret : -ERESTARTNOINTR;
@@ -1527,8 +1530,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
queue_unlock(&q, hb);
out_release_sem:
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
return ret;
uaddr_faulted:
@@ -1550,8 +1552,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
goto retry_unlocked;
}
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
ret = get_user(uval, uaddr);
if (!ret && (uval != -EFAULT))
@@ -1585,8 +1586,7 @@ retry:
/*
* First take all the futex related locks:
*/
- if (fshared)
- down_read(fshared);
+ futex_lock_mm(fshared);
ret = get_futex_key(uaddr, fshared, &key);
if (unlikely(ret != 0))
@@ -1601,11 +1601,9 @@ retry_unlocked:
* again. If it succeeds then we can return without waking
* anyone else up:
*/
- if (!(uval & FUTEX_OWNER_DIED)) {
- pagefault_disable();
- uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
- pagefault_enable();
- }
+ if (!(uval & FUTEX_OWNER_DIED))
+ uval = cmpxchg_futex_value_locked(uaddr, current->pid, 0);
+
if (unlikely(uval == -EFAULT))
goto pi_faulted;
@@ -1647,8 +1645,7 @@ retry_unlocked:
out_unlock:
spin_unlock(&hb->lock);
out:
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
return ret;
@@ -1671,8 +1668,7 @@ pi_faulted:
goto retry_unlocked;
}
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
ret = get_user(uval, uaddr);
if (!ret && (uval != -EFAULT))
@@ -1729,8 +1725,8 @@ static int futex_fd(u32 __user *uaddr, int signal)
if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) {
printk(KERN_WARNING "Process `%s' used FUTEX_FD, which "
- "will be removed from the kernel in June 2007\n",
- current->comm);
+ "will be removed from the kernel in June 2007\n",
+ current->comm);
}
ret = -EINVAL;
@@ -1908,10 +1904,8 @@ retry:
* Wake robust non-PI futexes here. The wakeup of
* PI futexes happens in exit_pi_state():
*/
- if (!pi) {
- if (uval & FUTEX_WAITERS)
+ if (!pi && (uval & FUTEX_WAITERS))
futex_wake(uaddr, &curr->mm->mmap_sem, 1);
- }
}
return 0;
}
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 23c03f43e196..72d034258ba1 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1406,7 +1406,7 @@ static void migrate_hrtimers(int cpu)
static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
- long cpu = (long)hcpu;
+ unsigned int cpu = (long)hcpu;
switch (action) {
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index bd9e272d55e9..32b161972fad 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -172,7 +172,17 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
irqreturn_t action_ret)
{
if (unlikely(action_ret != IRQ_HANDLED)) {
- desc->irqs_unhandled++;
+ /*
+ * If we are seeing only the odd spurious IRQ caused by
+ * bus asynchronicity then don't eventually trigger an error,
+ * otherwise the couter becomes a doomsday timer for otherwise
+ * working systems
+ */
+ if (jiffies - desc->last_unhandled > HZ/10)
+ desc->irqs_unhandled = 1;
+ else
+ desc->irqs_unhandled++;
+ desc->last_unhandled = jiffies;
if (unlikely(action_ret != IRQ_NONE))
report_bad_irq(irq, desc, action_ret);
}
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index fed54418626c..0d662475dd9f 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -317,13 +317,12 @@ int sprint_symbol(char *buffer, unsigned long address)
name = kallsyms_lookup(address, &size, &offset, &modname, namebuf);
if (!name)
return sprintf(buffer, "0x%lx", address);
- else {
- if (modname)
- return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset,
+
+ if (modname)
+ return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset,
size, modname);
- else
- return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size);
- }
+ else
+ return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size);
}
/* Look up a kernel symbol and print it to the kernel messages. */
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index cee419143fd4..bc41ad0f24f8 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -24,6 +24,7 @@
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/kfifo.h>
+#include <linux/log2.h>
/**
* kfifo_init - allocates a new FIFO using a preallocated buffer
@@ -41,7 +42,7 @@ struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
struct kfifo *fifo;
/* size must be a power of 2 */
- BUG_ON(size & (size - 1));
+ BUG_ON(!is_power_of_2(size));
fifo = kmalloc(sizeof(struct kfifo), gfp_mask);
if (!fifo)
diff --git a/kernel/kthread.c b/kernel/kthread.c
index bbd51b81a3e8..a404f7ee7395 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -215,7 +215,7 @@ int kthread_stop(struct task_struct *k)
EXPORT_SYMBOL(kthread_stop);
-static __init void kthreadd_setup(void)
+static noinline __init_refok void kthreadd_setup(void)
{
struct task_struct *tsk = current;
diff --git a/kernel/module.c b/kernel/module.c
index 9bd93de01f4a..539fed9ac83c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -61,10 +61,8 @@ extern int module_sysfs_initialized;
/* If this is set, the section belongs in the init part of the module */
#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
-/* Protects module list */
-static DEFINE_SPINLOCK(modlist_lock);
-
-/* List of modules, protected by module_mutex AND modlist_lock */
+/* List of modules, protected by module_mutex or preempt_disable
+ * (add/delete uses stop_machine). */
static DEFINE_MUTEX(module_mutex);
static LIST_HEAD(modules);
@@ -488,8 +486,7 @@ static void free_modinfo_##field(struct module *mod) \
mod->field = NULL; \
} \
static struct module_attribute modinfo_##field = { \
- .attr = { .name = __stringify(field), .mode = 0444, \
- .owner = THIS_MODULE }, \
+ .attr = { .name = __stringify(field), .mode = 0444 }, \
.show = show_modinfo_##field, \
.setup = setup_modinfo_##field, \
.test = modinfo_##field##_exists, \
@@ -761,14 +758,13 @@ static void print_unload_info(struct seq_file *m, struct module *mod)
void __symbol_put(const char *symbol)
{
struct module *owner;
- unsigned long flags;
const unsigned long *crc;
- spin_lock_irqsave(&modlist_lock, flags);
+ preempt_disable();
if (!__find_symbol(symbol, &owner, &crc, 1))
BUG();
module_put(owner);
- spin_unlock_irqrestore(&modlist_lock, flags);
+ preempt_enable();
}
EXPORT_SYMBOL(__symbol_put);
@@ -793,7 +789,7 @@ static ssize_t show_refcnt(struct module_attribute *mattr,
}
static struct module_attribute refcnt = {
- .attr = { .name = "refcnt", .mode = 0444, .owner = THIS_MODULE },
+ .attr = { .name = "refcnt", .mode = 0444 },
.show = show_refcnt,
};
@@ -851,7 +847,7 @@ static ssize_t show_initstate(struct module_attribute *mattr,
}
static struct module_attribute initstate = {
- .attr = { .name = "initstate", .mode = 0444, .owner = THIS_MODULE },
+ .attr = { .name = "initstate", .mode = 0444 },
.show = show_initstate,
};
@@ -1032,7 +1028,6 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
sattr->mattr.show = module_sect_show;
sattr->mattr.store = NULL;
sattr->mattr.attr.name = sattr->name;
- sattr->mattr.attr.owner = mod;
sattr->mattr.attr.mode = S_IRUGO;
*(gattr++) = &(sattr++)->mattr.attr;
}
@@ -1090,7 +1085,6 @@ int module_add_modinfo_attrs(struct module *mod)
if (!attr->test ||
(attr->test && attr->test(mod))) {
memcpy(temp_attr, attr, sizeof(*temp_attr));
- temp_attr->attr.owner = mod;
error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr);
++temp_attr;
}
@@ -1231,14 +1225,14 @@ static void free_module(struct module *mod)
void *__symbol_get(const char *symbol)
{
struct module *owner;
- unsigned long value, flags;
+ unsigned long value;
const unsigned long *crc;
- spin_lock_irqsave(&modlist_lock, flags);
+ preempt_disable();
value = __find_symbol(symbol, &owner, &crc, 1);
if (value && !strong_try_module_get(owner))
value = 0;
- spin_unlock_irqrestore(&modlist_lock, flags);
+ preempt_enable();
return (void *)value;
}
@@ -2235,26 +2229,13 @@ unsigned long module_kallsyms_lookup_name(const char *name)
/* Called by the /proc file system to return a list of modules. */
static void *m_start(struct seq_file *m, loff_t *pos)
{
- struct list_head *i;
- loff_t n = 0;
-
mutex_lock(&module_mutex);
- list_for_each(i, &modules) {
- if (n++ == *pos)
- break;
- }
- if (i == &modules)
- return NULL;
- return i;
+ return seq_list_start(&modules, *pos);
}
static void *m_next(struct seq_file *m, void *p, loff_t *pos)
{
- struct list_head *i = p;
- (*pos)++;
- if (i->next == &modules)
- return NULL;
- return i->next;
+ return seq_list_next(p, &modules, pos);
}
static void m_stop(struct seq_file *m, void *p)
@@ -2324,11 +2305,10 @@ const struct seq_operations modules_op = {
/* Given an address, look for it in the module exception tables. */
const struct exception_table_entry *search_module_extables(unsigned long addr)
{
- unsigned long flags;
const struct exception_table_entry *e = NULL;
struct module *mod;
- spin_lock_irqsave(&modlist_lock, flags);
+ preempt_disable();
list_for_each_entry(mod, &modules, list) {
if (mod->num_exentries == 0)
continue;
@@ -2339,7 +2319,7 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
if (e)
break;
}
- spin_unlock_irqrestore(&modlist_lock, flags);
+ preempt_enable();
/* Now, if we found one, we are running inside it now, hence
we cannot unload the module, hence no refcnt needed. */
@@ -2351,25 +2331,24 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
*/
int is_module_address(unsigned long addr)
{
- unsigned long flags;
struct module *mod;
- spin_lock_irqsave(&modlist_lock, flags);
+ preempt_disable();
list_for_each_entry(mod, &modules, list) {
if (within(addr, mod->module_core, mod->core_size)) {
- spin_unlock_irqrestore(&modlist_lock, flags);
+ preempt_enable();
return 1;
}
}
- spin_unlock_irqrestore(&modlist_lock, flags);
+ preempt_enable();
return 0;
}
-/* Is this a valid kernel address? We don't grab the lock: we are oopsing. */
+/* Is this a valid kernel address? */
struct module *__module_text_address(unsigned long addr)
{
struct module *mod;
@@ -2384,11 +2363,10 @@ struct module *__module_text_address(unsigned long addr)
struct module *module_text_address(unsigned long addr)
{
struct module *mod;
- unsigned long flags;
- spin_lock_irqsave(&modlist_lock, flags);
+ preempt_disable();
mod = __module_text_address(addr);
- spin_unlock_irqrestore(&modlist_lock, flags);
+ preempt_enable();
return mod;
}
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 9e83b589f754..10f0bbba382b 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -21,6 +21,8 @@
#include <linux/utsname.h>
#include <linux/pid_namespace.h>
+static struct kmem_cache *nsproxy_cachep;
+
struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
static inline void get_nsproxy(struct nsproxy *ns)
@@ -43,9 +45,11 @@ static inline struct nsproxy *clone_nsproxy(struct nsproxy *orig)
{
struct nsproxy *ns;
- ns = kmemdup(orig, sizeof(struct nsproxy), GFP_KERNEL);
- if (ns)
+ ns = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
+ if (ns) {
+ memcpy(ns, orig, sizeof(struct nsproxy));
atomic_set(&ns->count, 1);
+ }
return ns;
}
@@ -54,33 +58,51 @@ static inline struct nsproxy *clone_nsproxy(struct nsproxy *orig)
* Return the newly created nsproxy. Do not attach this to the task,
* leave it to the caller to do proper locking and attach it to task.
*/
-static struct nsproxy *create_new_namespaces(int flags, struct task_struct *tsk,
- struct fs_struct *new_fs)
+static struct nsproxy *create_new_namespaces(unsigned long flags,
+ struct task_struct *tsk, struct fs_struct *new_fs)
{
struct nsproxy *new_nsp;
+ int err;
new_nsp = clone_nsproxy(tsk->nsproxy);
if (!new_nsp)
return ERR_PTR(-ENOMEM);
new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs);
- if (IS_ERR(new_nsp->mnt_ns))
+ if (IS_ERR(new_nsp->mnt_ns)) {
+ err = PTR_ERR(new_nsp->mnt_ns);
goto out_ns;
+ }
new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns);
- if (IS_ERR(new_nsp->uts_ns))
+ if (IS_ERR(new_nsp->uts_ns)) {
+ err = PTR_ERR(new_nsp->uts_ns);
goto out_uts;
+ }
new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns);
- if (IS_ERR(new_nsp->ipc_ns))
+ if (IS_ERR(new_nsp->ipc_ns)) {
+ err = PTR_ERR(new_nsp->ipc_ns);
goto out_ipc;
+ }
new_nsp->pid_ns = copy_pid_ns(flags, tsk->nsproxy->pid_ns);
- if (IS_ERR(new_nsp->pid_ns))
+ if (IS_ERR(new_nsp->pid_ns)) {
+ err = PTR_ERR(new_nsp->pid_ns);
goto out_pid;
+ }
+
+ new_nsp->user_ns = copy_user_ns(flags, tsk->nsproxy->user_ns);
+ if (IS_ERR(new_nsp->user_ns)) {
+ err = PTR_ERR(new_nsp->user_ns);
+ goto out_user;
+ }
return new_nsp;
+out_user:
+ if (new_nsp->pid_ns)
+ put_pid_ns(new_nsp->pid_ns);
out_pid:
if (new_nsp->ipc_ns)
put_ipc_ns(new_nsp->ipc_ns);
@@ -91,15 +113,15 @@ out_uts:
if (new_nsp->mnt_ns)
put_mnt_ns(new_nsp->mnt_ns);
out_ns:
- kfree(new_nsp);
- return ERR_PTR(-ENOMEM);
+ kmem_cache_free(nsproxy_cachep, new_nsp);
+ return ERR_PTR(err);
}
/*
* called from clone. This now handles copy for nsproxy and all
* namespaces therein.
*/
-int copy_namespaces(int flags, struct task_struct *tsk)
+int copy_namespaces(unsigned long flags, struct task_struct *tsk)
{
struct nsproxy *old_ns = tsk->nsproxy;
struct nsproxy *new_ns;
@@ -110,7 +132,7 @@ int copy_namespaces(int flags, struct task_struct *tsk)
get_nsproxy(old_ns);
- if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC)))
+ if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER)))
return 0;
if (!capable(CAP_SYS_ADMIN)) {
@@ -140,7 +162,9 @@ void free_nsproxy(struct nsproxy *ns)
put_ipc_ns(ns->ipc_ns);
if (ns->pid_ns)
put_pid_ns(ns->pid_ns);
- kfree(ns);
+ if (ns->user_ns)
+ put_user_ns(ns->user_ns);
+ kmem_cache_free(nsproxy_cachep, ns);
}
/*
@@ -152,19 +176,10 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
{
int err = 0;
- if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC)))
+ if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
+ CLONE_NEWUSER)))
return 0;
-#ifndef CONFIG_IPC_NS
- if (unshare_flags & CLONE_NEWIPC)
- return -EINVAL;
-#endif
-
-#ifndef CONFIG_UTS_NS
- if (unshare_flags & CLONE_NEWUTS)
- return -EINVAL;
-#endif
-
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -174,3 +189,12 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
err = PTR_ERR(*new_nsp);
return err;
}
+
+static int __init nsproxy_cache_init(void)
+{
+ nsproxy_cachep = kmem_cache_create("nsproxy", sizeof(struct nsproxy),
+ 0, SLAB_PANIC, NULL, NULL);
+ return 0;
+}
+
+module_init(nsproxy_cache_init);
diff --git a/kernel/params.c b/kernel/params.c
index e61c46c97ce7..effbaaedd7f3 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -491,7 +491,6 @@ param_sysfs_setup(struct module_kobject *mk,
pattr->mattr.show = param_attr_show;
pattr->mattr.store = param_attr_store;
pattr->mattr.attr.name = (char *)&kp->name[name_skip];
- pattr->mattr.attr.owner = mk->mod;
pattr->mattr.attr.mode = kp->perm;
*(gattr++) = &(pattr++)->mattr.attr;
}
diff --git a/kernel/pid.c b/kernel/pid.c
index eb66bd2953ab..c6e3f9ffff87 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -365,7 +365,7 @@ struct pid *find_ge_pid(int nr)
}
EXPORT_SYMBOL_GPL(find_get_pid);
-struct pid_namespace *copy_pid_ns(int flags, struct pid_namespace *old_ns)
+struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
{
BUG_ON(!old_ns);
get_pid_ns(old_ns);
diff --git a/kernel/printk.c b/kernel/printk.c
index 0bbdeac2810c..051d27e36a6c 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -449,13 +449,16 @@ static int printk_time = 1;
#else
static int printk_time = 0;
#endif
-module_param(printk_time, int, S_IRUGO | S_IWUSR);
+module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
static int __init printk_time_setup(char *str)
{
if (*str)
return 0;
printk_time = 1;
+ printk(KERN_NOTICE "The 'time' option is deprecated and "
+ "is scheduled for removal in early 2008\n");
+ printk(KERN_NOTICE "Use 'printk.time=<value>' instead\n");
return 1;
}
@@ -483,6 +486,9 @@ static int have_callable_console(void)
* @fmt: format string
*
* This is printk(). It can be called from any context. We want it to work.
+ * Be aware of the fact that if oops_in_progress is not set, we might try to
+ * wake klogd up which could deadlock on runqueue lock if printk() is called
+ * from scheduler code.
*
* We try to grab the console_sem. If we succeed, it's easy - we log the output and
* call the console drivers. If we fail to get the semaphore we place the output
@@ -654,7 +660,7 @@ static void call_console_drivers(unsigned long start, unsigned long end)
*/
static int __init console_setup(char *str)
{
- char name[sizeof(console_cmdline[0].name)];
+ char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for index */
char *s, *options;
int idx;
@@ -662,27 +668,27 @@ static int __init console_setup(char *str)
* Decode str into name, index, options.
*/
if (str[0] >= '0' && str[0] <= '9') {
- strcpy(name, "ttyS");
- strncpy(name + 4, str, sizeof(name) - 5);
+ strcpy(buf, "ttyS");
+ strncpy(buf + 4, str, sizeof(buf) - 5);
} else {
- strncpy(name, str, sizeof(name) - 1);
+ strncpy(buf, str, sizeof(buf) - 1);
}
- name[sizeof(name) - 1] = 0;
+ buf[sizeof(buf) - 1] = 0;
if ((options = strchr(str, ',')) != NULL)
*(options++) = 0;
#ifdef __sparc__
if (!strcmp(str, "ttya"))
- strcpy(name, "ttyS0");
+ strcpy(buf, "ttyS0");
if (!strcmp(str, "ttyb"))
- strcpy(name, "ttyS1");
+ strcpy(buf, "ttyS1");
#endif
- for (s = name; *s; s++)
+ for (s = buf; *s; s++)
if ((*s >= '0' && *s <= '9') || *s == ',')
break;
idx = simple_strtoul(s, NULL, 10);
*s = 0;
- add_preferred_console(name, idx, options);
+ add_preferred_console(buf, idx, options);
return 1;
}
__setup("console=", console_setup);
@@ -709,7 +715,7 @@ int __init add_preferred_console(char *name, int idx, char *options)
* See if this tty is not yet registered, and
* if we have a slot free.
*/
- for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
+ for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
if (strcmp(console_cmdline[i].name, name) == 0 &&
console_cmdline[i].index == idx) {
selected_console = i;
@@ -726,6 +732,25 @@ int __init add_preferred_console(char *name, int idx, char *options)
return 0;
}
+int __init update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options)
+{
+ struct console_cmdline *c;
+ int i;
+
+ for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
+ if (strcmp(console_cmdline[i].name, name) == 0 &&
+ console_cmdline[i].index == idx) {
+ c = &console_cmdline[i];
+ memcpy(c->name, name_new, sizeof(c->name));
+ c->name[sizeof(c->name) - 1] = 0;
+ c->options = options;
+ c->index = idx_new;
+ return i;
+ }
+ /* not found */
+ return -1;
+}
+
#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND
/**
* suspend_console - suspend the console subsystem
@@ -942,6 +967,9 @@ void register_console(struct console *console)
if (preferred_console < 0 || bootconsole || !console_drivers)
preferred_console = selected_console;
+ if (console->early_setup)
+ console->early_setup();
+
/*
* See if we want to use this console driver. If we
* didn't select a console we take the first one
@@ -985,12 +1013,15 @@ void register_console(struct console *console)
if (!(console->flags & CON_ENABLED))
return;
- if (bootconsole) {
+ if (bootconsole && (console->flags & CON_CONSDEV)) {
printk(KERN_INFO "console handover: boot [%s%d] -> real [%s%d]\n",
bootconsole->name, bootconsole->index,
console->name, console->index);
unregister_console(bootconsole);
console->flags &= ~CON_PRINTBUFFER;
+ } else {
+ printk(KERN_INFO "console [%s%d] enabled\n",
+ console->name, console->index);
}
/*
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index ad7949a589dd..b1d11f1c7cf7 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -161,6 +161,7 @@ int ptrace_may_attach(struct task_struct *task)
int ptrace_attach(struct task_struct *task)
{
int retval;
+ unsigned long flags;
audit_ptrace(task);
@@ -181,9 +182,7 @@ repeat:
* cpu's that may have task_lock).
*/
task_lock(task);
- local_irq_disable();
- if (!write_trylock(&tasklist_lock)) {
- local_irq_enable();
+ if (!write_trylock_irqsave(&tasklist_lock, flags)) {
task_unlock(task);
do {
cpu_relax();
@@ -211,7 +210,7 @@ repeat:
force_sig_specific(SIGSTOP, task);
bad:
- write_unlock_irq(&tasklist_lock);
+ write_unlock_irqrestore(&tasklist_lock, flags);
task_unlock(task);
out:
return retval;
diff --git a/kernel/relay.c b/kernel/relay.c
index 3b299fb3855c..a615a8f513fc 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1061,7 +1061,7 @@ static struct pipe_buf_operations relay_pipe_buf_ops = {
.get = generic_pipe_buf_get,
};
-/**
+/*
* subbuf_splice_actor - splice up to one subbuf's worth of data
*/
static int subbuf_splice_actor(struct file *in,
@@ -1074,7 +1074,9 @@ static int subbuf_splice_actor(struct file *in,
unsigned int pidx, poff, total_len, subbuf_pages, ret;
struct rchan_buf *rbuf = in->private_data;
unsigned int subbuf_size = rbuf->chan->subbuf_size;
- size_t read_start = ((size_t)*ppos) % rbuf->chan->alloc_size;
+ uint64_t pos = (uint64_t) *ppos;
+ uint32_t alloc_size = (uint32_t) rbuf->chan->alloc_size;
+ size_t read_start = (size_t) do_div(pos, alloc_size);
size_t read_subbuf = read_start / subbuf_size;
size_t padding = rbuf->padding[read_subbuf];
size_t nonpad_end = read_subbuf * subbuf_size + subbuf_size - padding;
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index da8d6bf46457..5aedbee014df 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -29,12 +29,6 @@
#include "rtmutex_common.h"
-#ifdef CONFIG_DEBUG_RT_MUTEXES
-# include "rtmutex-debug.h"
-#else
-# include "rtmutex.h"
-#endif
-
# define TRACE_WARN_ON(x) WARN_ON(x)
# define TRACE_BUG_ON(x) BUG_ON(x)
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 17d28ce20300..8cd9bd2cdb34 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -17,12 +17,6 @@
#include "rtmutex_common.h"
-#ifdef CONFIG_DEBUG_RT_MUTEXES
-# include "rtmutex-debug.h"
-#else
-# include "rtmutex.h"
-#endif
-
/*
* lock->owner state tracking:
*
diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h
index 9c75856e791e..2d3b83593ca3 100644
--- a/kernel/rtmutex_common.h
+++ b/kernel/rtmutex_common.h
@@ -103,7 +103,7 @@ static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock)
{
- return (struct task_struct *)
+ return (struct task_struct *)
((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
}
@@ -120,4 +120,11 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
struct task_struct *proxy_owner);
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
struct task_struct *proxy_owner);
+
+#ifdef CONFIG_DEBUG_RT_MUTEXES
+# include "rtmutex-debug.h"
+#else
+# include "rtmutex.h"
+#endif
+
#endif
diff --git a/kernel/sched.c b/kernel/sched.c
index 9fbced64bfee..1c8076676eb1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -736,7 +736,9 @@ static void update_curr_load(struct rq *rq, u64 now)
*
* The "10% effect" is relative and cumulative: from _any_ nice level,
* if you go up 1 level, it's -10% CPU usage, if you go down 1 level
- * it's +10% CPU usage.
+ * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
+ * If a task goes up by ~10% and another task goes down by ~10% then
+ * the relative distance between them is ~25%.)
*/
static const int prio_to_weight[40] = {
/* -20 */ 88818, 71054, 56843, 45475, 36380, 29104, 23283, 18626, 14901, 11921,
@@ -746,15 +748,22 @@ static const int prio_to_weight[40] = {
/* 10 */ 110, 87, 70, 56, 45, 36, 29, 23, 18, 15,
};
+/*
+ * Inverse (2^32/x) values of the prio_to_weight[] array, precalculated.
+ *
+ * In cases where the weight does not change often, we can use the
+ * precalculated inverse to speed up arithmetics by turning divisions
+ * into multiplications:
+ */
static const u32 prio_to_wmult[40] = {
- 48356, 60446, 75558, 94446, 118058, 147573,
- 184467, 230589, 288233, 360285, 450347,
- 562979, 703746, 879575, 1099582, 1374389,
- 717986, 2147483, 2684354, 3355443, 4194304,
- 244160, 6557201, 8196502, 10250518, 12782640,
- 16025997, 19976592, 24970740, 31350126, 39045157,
- 49367440, 61356675, 76695844, 95443717, 119304647,
- 148102320, 186737708, 238609294, 286331153,
+/* -20 */ 48356, 60446, 75558, 94446, 118058,
+/* -15 */ 147573, 184467, 230589, 288233, 360285,
+/* -10 */ 450347, 562979, 703746, 879575, 1099582,
+/* -5 */ 1374389, 1717986, 2147483, 2684354, 3355443,
+/* 0 */ 4194304, 5244160, 6557201, 8196502, 10250518,
+/* 5 */ 12782640, 16025997, 19976592, 24970740, 31350126,
+/* 10 */ 39045157, 49367440, 61356675, 76695844, 95443717,
+/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
};
static inline void
@@ -4647,14 +4656,14 @@ static void show_task(struct task_struct *p)
state = p->state ? __ffs(p->state) + 1 : 0;
printk("%-13.13s %c", p->comm,
state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
-#if (BITS_PER_LONG == 32)
+#if BITS_PER_LONG == 32
if (state == TASK_RUNNING)
- printk(" running ");
+ printk(" running ");
else
- printk(" %08lX ", thread_saved_pc(p));
+ printk(" %08lx ", thread_saved_pc(p));
#else
if (state == TASK_RUNNING)
- printk(" running task ");
+ printk(" running task ");
else
printk(" %016lx ", thread_saved_pc(p));
#endif
@@ -4666,11 +4675,7 @@ static void show_task(struct task_struct *p)
free = (unsigned long)n - (unsigned long)end_of_stack(p);
}
#endif
- printk("%5lu %5d %6d", free, p->pid, p->parent->pid);
- if (!p->mm)
- printk(" (L-TLB)\n");
- else
- printk(" (NOTLB)\n");
+ printk("%5lu %5d %6d\n", free, p->pid, p->parent->pid);
if (state != TASK_RUNNING)
show_stack(p, NULL);
@@ -4680,14 +4685,12 @@ void show_state_filter(unsigned long state_filter)
{
struct task_struct *g, *p;
-#if (BITS_PER_LONG == 32)
- printk("\n"
- " free sibling\n");
- printk(" task PC stack pid father child younger older\n");
+#if BITS_PER_LONG == 32
+ printk(KERN_INFO
+ " task PC stack pid father\n");
#else
- printk("\n"
- " free sibling\n");
- printk(" task PC stack pid father child younger older\n");
+ printk(KERN_INFO
+ " task PC stack pid father\n");
#endif
read_lock(&tasklist_lock);
do_each_thread(g, p) {
@@ -4778,7 +4781,7 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
static inline void sched_init_granularity(void)
{
unsigned int factor = 1 + ilog2(num_online_cpus());
- const unsigned long gran_limit = 10000000;
+ const unsigned long gran_limit = 100000000;
sysctl_sched_granularity *= factor;
if (sysctl_sched_granularity > gran_limit)
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 1baf87cceb7c..29f2c21e7da2 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -171,7 +171,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
u64 now = ktime_to_ns(ktime_get());
int cpu;
- SEQ_printf(m, "Sched Debug Version: v0.04, cfs-v20, %s %.*s\n",
+ SEQ_printf(m, "Sched Debug Version: v0.05, %s %.*s\n",
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index c3391b6020e8..ad64fcb731f2 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -10,6 +10,7 @@
#include <linux/sched.h>
/* #define SECCOMP_DEBUG 1 */
+#define NR_SECCOMP_MODES 1
/*
* Secure computing mode 1 allows only read/write/exit/sigreturn.
@@ -54,3 +55,31 @@ void __secure_computing(int this_syscall)
#endif
do_exit(SIGKILL);
}
+
+long prctl_get_seccomp(void)
+{
+ return current->seccomp.mode;
+}
+
+long prctl_set_seccomp(unsigned long seccomp_mode)
+{
+ long ret;
+
+ /* can set it only once to be even more secure */
+ ret = -EPERM;
+ if (unlikely(current->seccomp.mode))
+ goto out;
+
+ ret = -EINVAL;
+ if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
+ current->seccomp.mode = seccomp_mode;
+ set_thread_flag(TIF_SECCOMP);
+#ifdef TIF_NOTSC
+ disable_TSC();
+#endif
+ ret = 0;
+ }
+
+ out:
+ return ret;
+}
diff --git a/kernel/signal.c b/kernel/signal.c
index f9405609774e..39d122753bac 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -718,6 +718,37 @@ out_set:
#define LEGACY_QUEUE(sigptr, sig) \
(((sig) < SIGRTMIN) && sigismember(&(sigptr)->signal, (sig)))
+int print_fatal_signals;
+
+static void print_fatal_signal(struct pt_regs *regs, int signr)
+{
+ printk("%s/%d: potentially unexpected fatal signal %d.\n",
+ current->comm, current->pid, signr);
+
+#ifdef __i386__
+ printk("code at %08lx: ", regs->eip);
+ {
+ int i;
+ for (i = 0; i < 16; i++) {
+ unsigned char insn;
+
+ __get_user(insn, (unsigned char *)(regs->eip + i));
+ printk("%02x ", insn);
+ }
+ }
+#endif
+ printk("\n");
+ show_regs(regs);
+}
+
+static int __init setup_print_fatal_signals(char *str)
+{
+ get_option (&str, &print_fatal_signals);
+
+ return 1;
+}
+
+__setup("print-fatal-signals=", setup_print_fatal_signals);
static int
specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
@@ -1855,6 +1886,8 @@ relock:
* Anything else is fatal, maybe with a core dump.
*/
current->flags |= PF_SIGNALED;
+ if ((signr != SIGKILL) && print_fatal_signals)
+ print_fatal_signal(regs, signr);
if (sig_kernel_coredump(signr)) {
/*
* If it was able to dump core, this kills all
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 73217a9e2875..8de267790166 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -614,12 +614,16 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
kthread_bind(per_cpu(ksoftirqd, hotcpu),
any_online_cpu(cpu_online_map));
case CPU_DEAD:
- case CPU_DEAD_FROZEN:
+ case CPU_DEAD_FROZEN: {
+ struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+
p = per_cpu(ksoftirqd, hotcpu);
per_cpu(ksoftirqd, hotcpu) = NULL;
+ sched_setscheduler(p, SCHED_FIFO, &param);
kthread_stop(p);
takeover_tasklets(hotcpu);
break;
+ }
#endif /* CONFIG_HOTPLUG_CPU */
}
return NOTIFY_OK;
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index fcee2a8e6da3..319821ef78af 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -93,10 +93,6 @@ static void stopmachine_set_state(enum stopmachine_state state)
static int stop_machine(void)
{
int i, ret = 0;
- struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
-
- /* One high-prio thread per cpu. We'll do this one. */
- sched_setscheduler(current, SCHED_FIFO, &param);
atomic_set(&stopmachine_thread_ack, 0);
stopmachine_num_threads = 0;
@@ -189,6 +185,10 @@ struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
p = kthread_create(do_stop, &smdata, "kstopmachine");
if (!IS_ERR(p)) {
+ struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+
+ /* One high-prio thread per cpu. We'll do this one. */
+ sched_setscheduler(p, SCHED_FIFO, &param);
kthread_bind(p, cpu);
wake_up_process(p);
wait_for_completion(&smdata.done);
diff --git a/kernel/sys.c b/kernel/sys.c
index 872271ccc384..4d141ae3e802 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -31,10 +31,12 @@
#include <linux/cn_proc.h>
#include <linux/getcpu.h>
#include <linux/task_io_accounting_ops.h>
+#include <linux/seccomp.h>
#include <linux/compat.h>
#include <linux/syscalls.h>
#include <linux/kprobes.h>
+#include <linux/user_namespace.h>
#include <asm/uaccess.h>
#include <asm/io.h>
@@ -1078,13 +1080,13 @@ static int set_user(uid_t new_ruid, int dumpclear)
{
struct user_struct *new_user;
- new_user = alloc_uid(new_ruid);
+ new_user = alloc_uid(current->nsproxy->user_ns, new_ruid);
if (!new_user)
return -EAGAIN;
if (atomic_read(&new_user->processes) >=
current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
- new_user != &root_user) {
+ new_user != current->nsproxy->user_ns->root_user) {
free_uid(new_user);
return -EAGAIN;
}
@@ -2241,6 +2243,13 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
error = SET_ENDIAN(current, arg2);
break;
+ case PR_GET_SECCOMP:
+ error = prctl_get_seccomp();
+ break;
+ case PR_SET_SECCOMP:
+ error = prctl_set_seccomp(arg2);
+ break;
+
default:
error = -EINVAL;
break;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 7e11e2c98bf9..b0ec498a18d9 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -14,6 +14,7 @@ asmlinkage long sys_ni_syscall(void)
cond_syscall(sys_nfsservctl);
cond_syscall(sys_quotactl);
+cond_syscall(sys32_quotactl);
cond_syscall(sys_acct);
cond_syscall(sys_lookup_dcookie);
cond_syscall(sys_swapon);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 51f5dac42a00..2ce7acf841ae 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -61,6 +61,7 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
/* External variables not in a header file. */
extern int C_A_D;
+extern int print_fatal_signals;
extern int sysctl_overcommit_memory;
extern int sysctl_overcommit_ratio;
extern int sysctl_panic_on_oom;
@@ -202,7 +203,10 @@ static ctl_table root_table[] = {
.mode = 0555,
.child = dev_table,
},
-
+/*
+ * NOTE: do not add new entries to this table unless you have read
+ * Documentation/sysctl/ctl_unnumbered.txt
+ */
{ .ctl_name = 0 }
};
@@ -340,6 +344,14 @@ static ctl_table kern_table[] = {
.proc_handler = &proc_dointvec,
},
#endif
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "print-fatal-signals",
+ .data = &print_fatal_signals,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
#ifdef __sparc__
{
.ctl_name = KERN_SPARC_REBOOT,
@@ -949,6 +961,27 @@ static ctl_table vm_table[] = {
.strategy = &sysctl_jiffies,
},
#endif
+#ifdef CONFIG_SECURITY
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "mmap_min_addr",
+ .data = &mmap_min_addr,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = &proc_doulongvec_minmax,
+ },
+#ifdef CONFIG_NUMA
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "numa_zonelist_order",
+ .data = &numa_zonelist_order,
+ .maxlen = NUMA_ZONELIST_ORDER_LEN,
+ .mode = 0644,
+ .proc_handler = &numa_zonelist_order_handler,
+ .strategy = &sysctl_string,
+ },
+#endif
+#endif
#if defined(CONFIG_X86_32) || \
(defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
{
@@ -962,6 +995,10 @@ static ctl_table vm_table[] = {
.extra1 = &zero,
},
#endif
+/*
+ * NOTE: do not add new entries to this table unless you have read
+ * Documentation/sysctl/ctl_unnumbered.txt
+ */
{ .ctl_name = 0 }
};
@@ -1102,6 +1139,10 @@ static ctl_table fs_table[] = {
.child = binfmt_misc_table,
},
#endif
+/*
+ * NOTE: do not add new entries to this table unless you have read
+ * Documentation/sysctl/ctl_unnumbered.txt
+ */
{ .ctl_name = 0 }
};
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 906cae771585..059431ed67db 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -196,6 +196,8 @@ static int fill_pid(pid_t pid, struct task_struct *tsk,
/* fill in basic acct fields */
stats->version = TASKSTATS_VERSION;
+ stats->nvcsw = tsk->nvcsw;
+ stats->nivcsw = tsk->nivcsw;
bacct_add_tsk(stats, tsk);
/* fill in extended acct fields */
@@ -242,6 +244,8 @@ static int fill_tgid(pid_t tgid, struct task_struct *first,
*/
delayacct_add_tsk(stats, tsk);
+ stats->nvcsw += tsk->nvcsw;
+ stats->nivcsw += tsk->nivcsw;
} while_each_thread(first, tsk);
unlock_task_sighand(first, &flags);
diff --git a/kernel/time.c b/kernel/time.c
index f04791f69408..ffe19149d770 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -57,14 +57,17 @@ EXPORT_SYMBOL(sys_tz);
*/
asmlinkage long sys_time(time_t __user * tloc)
{
- time_t i;
- struct timeval tv;
+ /*
+ * We read xtime.tv_sec atomically - it's updated
+ * atomically by update_wall_time(), so no need to
+ * even read-lock the xtime seqlock:
+ */
+ time_t i = xtime.tv_sec;
- do_gettimeofday(&tv);
- i = tv.tv_sec;
+ smp_rmb(); /* sys_time() results are coherent */
if (tloc) {
- if (put_user(i,tloc))
+ if (put_user(i, tloc))
i = -EFAULT;
}
return i;
@@ -373,12 +376,25 @@ void do_gettimeofday (struct timeval *tv)
tv->tv_sec = sec;
tv->tv_usec = usec;
-}
+ /*
+ * Make sure xtime.tv_sec [returned by sys_time()] always
+ * follows the gettimeofday() result precisely. This
+ * condition is extremely unlikely, it can hit at most
+ * once per second:
+ */
+ if (unlikely(xtime.tv_sec != tv->tv_sec)) {
+ unsigned long flags;
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+ update_wall_time();
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+ }
+}
EXPORT_SYMBOL(do_gettimeofday);
+#else /* CONFIG_TIME_INTERPOLATION */
-#else
#ifndef CONFIG_GENERIC_TIME
/*
* Simulate gettimeofday using do_gettimeofday which only allows a timeval
@@ -394,7 +410,7 @@ void getnstimeofday(struct timespec *tv)
}
EXPORT_SYMBOL_GPL(getnstimeofday);
#endif
-#endif
+#endif /* CONFIG_TIME_INTERPOLATION */
/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
* Assumes input in normal date format, i.e. 1980-12-31 23:59:59
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 76212b2a99de..2ad1c37b8dfe 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -205,47 +205,6 @@ void clockevents_exchange_device(struct clock_event_device *old,
}
/**
- * clockevents_request_device
- */
-struct clock_event_device *clockevents_request_device(unsigned int features,
- cpumask_t cpumask)
-{
- struct clock_event_device *cur, *dev = NULL;
- struct list_head *tmp;
-
- spin_lock(&clockevents_lock);
-
- list_for_each(tmp, &clockevent_devices) {
- cur = list_entry(tmp, struct clock_event_device, list);
-
- if ((cur->features & features) == features &&
- cpus_equal(cpumask, cur->cpumask)) {
- if (!dev || dev->rating < cur->rating)
- dev = cur;
- }
- }
-
- clockevents_exchange_device(NULL, dev);
-
- spin_unlock(&clockevents_lock);
-
- return dev;
-}
-
-/**
- * clockevents_release_device
- */
-void clockevents_release_device(struct clock_event_device *dev)
-{
- spin_lock(&clockevents_lock);
-
- clockevents_exchange_device(dev, NULL);
- clockevents_notify_released();
-
- spin_unlock(&clockevents_lock);
-}
-
-/**
* clockevents_notify - notification about relevant events
*/
void clockevents_notify(unsigned long reason, void *arg)
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index cf53bb5814cb..438c6b723ee2 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -13,7 +13,7 @@
#include <linux/timex.h>
#include <linux/jiffies.h>
#include <linux/hrtimer.h>
-
+#include <linux/capability.h>
#include <asm/div64.h>
#include <asm/timex.h>
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3d1042f82a68..728cedfd3cbd 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -36,9 +36,17 @@ EXPORT_SYMBOL(xtime_lock);
* at zero at system boot time, so wall_to_monotonic will be negative,
* however, we will ALWAYS keep the tv_nsec part positive so we can use
* the usual normalization.
+ *
+ * wall_to_monotonic is moved after resume from suspend for the monotonic
+ * time not to jump. We need to add total_sleep_time to wall_to_monotonic
+ * to get the real boot based time offset.
+ *
+ * - wall_to_monotonic is no longer the boot time, getboottime must be
+ * used instead.
*/
struct timespec xtime __attribute__ ((aligned (16)));
struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
+static unsigned long total_sleep_time; /* seconds */
EXPORT_SYMBOL(xtime);
@@ -251,6 +259,7 @@ void __init timekeeping_init(void)
xtime.tv_nsec = 0;
set_normalized_timespec(&wall_to_monotonic,
-xtime.tv_sec, -xtime.tv_nsec);
+ total_sleep_time = 0;
write_sequnlock_irqrestore(&xtime_lock, flags);
}
@@ -282,6 +291,7 @@ static int timekeeping_resume(struct sys_device *dev)
xtime.tv_sec += sleep_length;
wall_to_monotonic.tv_sec -= sleep_length;
+ total_sleep_time += sleep_length;
}
/* re-base the last cycle value */
clock->cycle_last = clocksource_read(clock);
@@ -476,3 +486,30 @@ void update_wall_time(void)
change_clocksource();
update_vsyscall(&xtime, clock);
}
+
+/**
+ * getboottime - Return the real time of system boot.
+ * @ts: pointer to the timespec to be set
+ *
+ * Returns the time of day in a timespec.
+ *
+ * This is based on the wall_to_monotonic offset and the total suspend
+ * time. Calls to settimeofday will affect the value returned (which
+ * basically means that however wrong your real time clock is at boot time,
+ * you get the right time here).
+ */
+void getboottime(struct timespec *ts)
+{
+ set_normalized_timespec(ts,
+ - (wall_to_monotonic.tv_sec + total_sleep_time),
+ - wall_to_monotonic.tv_nsec);
+}
+
+/**
+ * monotonic_to_bootbased - Convert the monotonic time to boot based.
+ * @ts: pointer to the timespec to be converted
+ */
+void monotonic_to_bootbased(struct timespec *ts)
+{
+ ts->tv_sec += total_sleep_time;
+}
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index 321693724ad7..9b8a826236dd 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -68,6 +68,7 @@ struct entry {
* Number of timeout events:
*/
unsigned long count;
+ unsigned int timer_flag;
/*
* We save the command-line string to preserve
@@ -231,7 +232,8 @@ static struct entry *tstat_lookup(struct entry *entry, char *comm)
* incremented. Otherwise the timer is registered in a free slot.
*/
void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
- void *timerf, char * comm)
+ void *timerf, char *comm,
+ unsigned int timer_flag)
{
/*
* It doesnt matter which lock we take:
@@ -249,6 +251,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
input.start_func = startf;
input.expire_func = timerf;
input.pid = pid;
+ input.timer_flag = timer_flag;
spin_lock_irqsave(lock, flags);
if (!active)
@@ -295,7 +298,7 @@ static int tstats_show(struct seq_file *m, void *v)
period = ktime_to_timespec(time);
ms = period.tv_nsec / 1000000;
- seq_puts(m, "Timer Stats Version: v0.1\n");
+ seq_puts(m, "Timer Stats Version: v0.2\n");
seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms);
if (atomic_read(&overflow_count))
seq_printf(m, "Overflow: %d entries\n",
@@ -303,8 +306,13 @@ static int tstats_show(struct seq_file *m, void *v)
for (i = 0; i < nr_entries; i++) {
entry = entries + i;
- seq_printf(m, "%4lu, %5d %-16s ",
+ if (entry->timer_flag & TIMER_STATS_FLAG_DEFERRABLE) {
+ seq_printf(m, "%4luD, %5d %-16s ",
entry->count, entry->pid, entry->comm);
+ } else {
+ seq_printf(m, " %4lu, %5d %-16s ",
+ entry->count, entry->pid, entry->comm);
+ }
print_name_offset(m, (unsigned long)entry->start_func);
seq_puts(m, " (");
diff --git a/kernel/timer.c b/kernel/timer.c
index 1a69705c2fb9..1258371e0d2b 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -305,6 +305,20 @@ void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
timer->start_pid = current->pid;
}
+
+static void timer_stats_account_timer(struct timer_list *timer)
+{
+ unsigned int flag = 0;
+
+ if (unlikely(tbase_get_deferrable(timer->base)))
+ flag |= TIMER_STATS_FLAG_DEFERRABLE;
+
+ timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
+ timer->function, timer->start_comm, flag);
+}
+
+#else
+static void timer_stats_account_timer(struct timer_list *timer) {}
#endif
/**
@@ -1114,6 +1128,7 @@ int do_sysinfo(struct sysinfo *info)
getnstimeofday(&tp);
tp.tv_sec += wall_to_monotonic.tv_sec;
tp.tv_nsec += wall_to_monotonic.tv_nsec;
+ monotonic_to_bootbased(&tp);
if (tp.tv_nsec - NSEC_PER_SEC >= 0) {
tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
tp.tv_sec++;
diff --git a/kernel/user.c b/kernel/user.c
index 4869563080e9..98b82507797a 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -14,20 +14,19 @@
#include <linux/bitops.h>
#include <linux/key.h>
#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/user_namespace.h>
/*
* UID task count cache, to get fast user lookup in "alloc_uid"
* when changing user ID's (ie setuid() and friends).
*/
-#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8)
-#define UIDHASH_SZ (1 << UIDHASH_BITS)
#define UIDHASH_MASK (UIDHASH_SZ - 1)
#define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
-#define uidhashentry(uid) (uidhash_table + __uidhashfn((uid)))
+#define uidhashentry(ns, uid) ((ns)->uidhash_table + __uidhashfn((uid)))
static struct kmem_cache *uid_cachep;
-static struct list_head uidhash_table[UIDHASH_SZ];
/*
* The uidhash_lock is mostly taken from process context, but it is
@@ -94,9 +93,10 @@ struct user_struct *find_user(uid_t uid)
{
struct user_struct *ret;
unsigned long flags;
+ struct user_namespace *ns = current->nsproxy->user_ns;
spin_lock_irqsave(&uidhash_lock, flags);
- ret = uid_hash_find(uid, uidhashentry(uid));
+ ret = uid_hash_find(uid, uidhashentry(ns, uid));
spin_unlock_irqrestore(&uidhash_lock, flags);
return ret;
}
@@ -120,9 +120,9 @@ void free_uid(struct user_struct *up)
}
}
-struct user_struct * alloc_uid(uid_t uid)
+struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
{
- struct list_head *hashent = uidhashentry(uid);
+ struct list_head *hashent = uidhashentry(ns, uid);
struct user_struct *up;
spin_lock_irq(&uidhash_lock);
@@ -211,11 +211,11 @@ static int __init uid_cache_init(void)
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
for(n = 0; n < UIDHASH_SZ; ++n)
- INIT_LIST_HEAD(uidhash_table + n);
+ INIT_LIST_HEAD(init_user_ns.uidhash_table + n);
/* Insert the root user immediately (init already runs as root) */
spin_lock_irq(&uidhash_lock);
- uid_hash_insert(&root_user, uidhashentry(0));
+ uid_hash_insert(&root_user, uidhashentry(&init_user_ns, 0));
spin_unlock_irq(&uidhash_lock);
return 0;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
new file mode 100644
index 000000000000..d055d987850c
--- /dev/null
+++ b/kernel/user_namespace.c
@@ -0,0 +1,87 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/nsproxy.h>
+#include <linux/user_namespace.h>
+
+struct user_namespace init_user_ns = {
+ .kref = {
+ .refcount = ATOMIC_INIT(2),
+ },
+ .root_user = &root_user,
+};
+
+EXPORT_SYMBOL_GPL(init_user_ns);
+
+#ifdef CONFIG_USER_NS
+
+/*
+ * Clone a new ns copying an original user ns, setting refcount to 1
+ * @old_ns: namespace to clone
+ * Return NULL on error (failure to kmalloc), new ns otherwise
+ */
+static struct user_namespace *clone_user_ns(struct user_namespace *old_ns)
+{
+ struct user_namespace *ns;
+ struct user_struct *new_user;
+ int n;
+
+ ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
+ if (!ns)
+ return ERR_PTR(-ENOMEM);
+
+ kref_init(&ns->kref);
+
+ for (n = 0; n < UIDHASH_SZ; ++n)
+ INIT_LIST_HEAD(ns->uidhash_table + n);
+
+ /* Insert new root user. */
+ ns->root_user = alloc_uid(ns, 0);
+ if (!ns->root_user) {
+ kfree(ns);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* Reset current->user with a new one */
+ new_user = alloc_uid(ns, current->uid);
+ if (!new_user) {
+ free_uid(ns->root_user);
+ kfree(ns);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ switch_uid(new_user);
+ return ns;
+}
+
+struct user_namespace * copy_user_ns(int flags, struct user_namespace *old_ns)
+{
+ struct user_namespace *new_ns;
+
+ BUG_ON(!old_ns);
+ get_user_ns(old_ns);
+
+ if (!(flags & CLONE_NEWUSER))
+ return old_ns;
+
+ new_ns = clone_user_ns(old_ns);
+
+ put_user_ns(old_ns);
+ return new_ns;
+}
+
+void free_user_ns(struct kref *kref)
+{
+ struct user_namespace *ns;
+
+ ns = container_of(kref, struct user_namespace, kref);
+ kfree(ns);
+}
+
+#endif /* CONFIG_USER_NS */
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 160c8c5136bd..9d8180a0f0d8 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -13,6 +13,7 @@
#include <linux/uts.h>
#include <linux/utsname.h>
#include <linux/version.h>
+#include <linux/err.h>
/*
* Clone a new ns copying an original utsname, setting refcount to 1
@@ -24,10 +25,11 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
struct uts_namespace *ns;
ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
- if (ns) {
- memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
- kref_init(&ns->kref);
- }
+ if (!ns)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
+ kref_init(&ns->kref);
return ns;
}
@@ -37,7 +39,7 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
* utsname of this process won't be seen by parent, and vice
* versa.
*/
-struct uts_namespace *copy_utsname(int flags, struct uts_namespace *old_ns)
+struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *old_ns)
{
struct uts_namespace *new_ns;
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index f22b9dbd2a9c..c76c06466bfd 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -18,10 +18,7 @@
static void *get_uts(ctl_table *table, int write)
{
char *which = table->data;
-#ifdef CONFIG_UTS_NS
- struct uts_namespace *uts_ns = current->nsproxy->uts_ns;
- which = (which - (char *)&init_uts_ns) + (char *)uts_ns;
-#endif
+
if (!write)
down_read(&uts_sem);
else
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 3bebf73be976..d7d3fa3072e5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -382,16 +382,16 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
EXPORT_SYMBOL_GPL(flush_workqueue);
/*
- * Upon a successful return, the caller "owns" WORK_STRUCT_PENDING bit,
+ * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
* so this work can't be re-armed in any way.
*/
static int try_to_grab_pending(struct work_struct *work)
{
struct cpu_workqueue_struct *cwq;
- int ret = 0;
+ int ret = -1;
if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work)))
- return 1;
+ return 0;
/*
* The queueing is in progress, or it is already queued. Try to
@@ -457,10 +457,28 @@ static void wait_on_work(struct work_struct *work)
wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
}
+static int __cancel_work_timer(struct work_struct *work,
+ struct timer_list* timer)
+{
+ int ret;
+
+ do {
+ ret = (timer && likely(del_timer(timer)));
+ if (!ret)
+ ret = try_to_grab_pending(work);
+ wait_on_work(work);
+ } while (unlikely(ret < 0));
+
+ work_clear_pending(work);
+ return ret;
+}
+
/**
* cancel_work_sync - block until a work_struct's callback has terminated
* @work: the work which is to be flushed
*
+ * Returns true if @work was pending.
+ *
* cancel_work_sync() will cancel the work if it is queued. If the work's
* callback appears to be running, cancel_work_sync() will block until it
* has completed.
@@ -476,31 +494,26 @@ static void wait_on_work(struct work_struct *work)
* The caller must ensure that workqueue_struct on which this work was last
* queued can't be destroyed before this function returns.
*/
-void cancel_work_sync(struct work_struct *work)
+int cancel_work_sync(struct work_struct *work)
{
- while (!try_to_grab_pending(work))
- cpu_relax();
- wait_on_work(work);
- work_clear_pending(work);
+ return __cancel_work_timer(work, NULL);
}
EXPORT_SYMBOL_GPL(cancel_work_sync);
/**
- * cancel_rearming_delayed_work - reliably kill off a delayed work.
+ * cancel_delayed_work_sync - reliably kill off a delayed work.
* @dwork: the delayed work struct
*
+ * Returns true if @dwork was pending.
+ *
* It is possible to use this function if @dwork rearms itself via queue_work()
* or queue_delayed_work(). See also the comment for cancel_work_sync().
*/
-void cancel_rearming_delayed_work(struct delayed_work *dwork)
+int cancel_delayed_work_sync(struct delayed_work *dwork)
{
- while (!del_timer(&dwork->timer) &&
- !try_to_grab_pending(&dwork->work))
- cpu_relax();
- wait_on_work(&dwork->work);
- work_clear_pending(&dwork->work);
+ return __cancel_work_timer(&dwork->work, &dwork->timer);
}
-EXPORT_SYMBOL(cancel_rearming_delayed_work);
+EXPORT_SYMBOL(cancel_delayed_work_sync);
static struct workqueue_struct *keventd_wq __read_mostly;