aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/fork.c479
-rw-r--r--kernel/irq/autoprobe.c2
-rw-r--r--kernel/irq/chip.c6
-rw-r--r--kernel/irq/debug.h14
-rw-r--r--kernel/irq/internals.h2
-rw-r--r--kernel/kallsyms.c46
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/power/power.h3
-rw-r--r--kernel/printk/printk.c215
-rw-r--r--kernel/sched/autogroup.c5
-rw-r--r--kernel/sysctl.c7
-rw-r--r--kernel/trace/ftrace.c2
-rw-r--r--kernel/trace/trace.c14
-rw-r--r--kernel/trace/trace_events.c2
-rw-r--r--kernel/trace/trace_selftest_dynamic.c5
-rw-r--r--kernel/trace/trace_uprobe.c2
-rw-r--r--kernel/workqueue.c1
17 files changed, 485 insertions, 326 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 2295fc69717f..5c372c954f3b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -77,6 +77,7 @@
#include <linux/blkdev.h>
#include <linux/fs_struct.h>
#include <linux/magic.h>
+#include <linux/sched/mm.h>
#include <linux/perf_event.h>
#include <linux/posix-timers.h>
#include <linux/user-return-notifier.h>
@@ -282,8 +283,9 @@ static void free_thread_stack(struct task_struct *tsk)
void thread_stack_cache_init(void)
{
- thread_stack_cache = kmem_cache_create("thread_stack", THREAD_SIZE,
- THREAD_SIZE, 0, NULL);
+ thread_stack_cache = kmem_cache_create_usercopy("thread_stack",
+ THREAD_SIZE, THREAD_SIZE, 0, 0,
+ THREAD_SIZE, NULL);
BUG_ON(thread_stack_cache == NULL);
}
# endif
@@ -390,6 +392,241 @@ void free_task(struct task_struct *tsk)
}
EXPORT_SYMBOL(free_task);
+#ifdef CONFIG_MMU
+static __latent_entropy int dup_mmap(struct mm_struct *mm,
+ struct mm_struct *oldmm)
+{
+ struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
+ struct rb_node **rb_link, *rb_parent;
+ int retval;
+ unsigned long charge;
+ LIST_HEAD(uf);
+
+ uprobe_start_dup_mmap();
+ if (down_write_killable(&oldmm->mmap_sem)) {
+ retval = -EINTR;
+ goto fail_uprobe_end;
+ }
+ flush_cache_dup_mm(oldmm);
+ uprobe_dup_mmap(oldmm, mm);
+ /*
+ * Not linked in yet - no deadlock potential:
+ */
+ down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
+
+ /* No ordering required: file already has been exposed. */
+ RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
+
+ mm->total_vm = oldmm->total_vm;
+ mm->data_vm = oldmm->data_vm;
+ mm->exec_vm = oldmm->exec_vm;
+ mm->stack_vm = oldmm->stack_vm;
+
+ rb_link = &mm->mm_rb.rb_node;
+ rb_parent = NULL;
+ pprev = &mm->mmap;
+ retval = ksm_fork(mm, oldmm);
+ if (retval)
+ goto out;
+ retval = khugepaged_fork(mm, oldmm);
+ if (retval)
+ goto out;
+
+ prev = NULL;
+ for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
+ struct file *file;
+
+ if (mpnt->vm_flags & VM_DONTCOPY) {
+ vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt));
+ continue;
+ }
+ charge = 0;
+ if (mpnt->vm_flags & VM_ACCOUNT) {
+ unsigned long len = vma_pages(mpnt);
+
+ if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
+ goto fail_nomem;
+ charge = len;
+ }
+ tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+ if (!tmp)
+ goto fail_nomem;
+ *tmp = *mpnt;
+ INIT_LIST_HEAD(&tmp->anon_vma_chain);
+ retval = vma_dup_policy(mpnt, tmp);
+ if (retval)
+ goto fail_nomem_policy;
+ tmp->vm_mm = mm;
+ retval = dup_userfaultfd(tmp, &uf);
+ if (retval)
+ goto fail_nomem_anon_vma_fork;
+ if (tmp->vm_flags & VM_WIPEONFORK) {
+ /* VM_WIPEONFORK gets a clean slate in the child. */
+ tmp->anon_vma = NULL;
+ if (anon_vma_prepare(tmp))
+ goto fail_nomem_anon_vma_fork;
+ } else if (anon_vma_fork(tmp, mpnt))
+ goto fail_nomem_anon_vma_fork;
+ tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT);
+ tmp->vm_next = tmp->vm_prev = NULL;
+ file = tmp->vm_file;
+ if (file) {
+ struct inode *inode = file_inode(file);
+ struct address_space *mapping = file->f_mapping;
+
+ get_file(file);
+ if (tmp->vm_flags & VM_DENYWRITE)
+ atomic_dec(&inode->i_writecount);
+ i_mmap_lock_write(mapping);
+ if (tmp->vm_flags & VM_SHARED)
+ atomic_inc(&mapping->i_mmap_writable);
+ flush_dcache_mmap_lock(mapping);
+ /* insert tmp into the share list, just after mpnt */
+ vma_interval_tree_insert_after(tmp, mpnt,
+ &mapping->i_mmap);
+ flush_dcache_mmap_unlock(mapping);
+ i_mmap_unlock_write(mapping);
+ }
+
+ /*
+ * Clear hugetlb-related page reserves for children. This only
+ * affects MAP_PRIVATE mappings. Faults generated by the child
+ * are not guaranteed to succeed, even if read-only
+ */
+ if (is_vm_hugetlb_page(tmp))
+ reset_vma_resv_huge_pages(tmp);
+
+ /*
+ * Link in the new vma and copy the page table entries.
+ */
+ *pprev = tmp;
+ pprev = &tmp->vm_next;
+ tmp->vm_prev = prev;
+ prev = tmp;
+
+ __vma_link_rb(mm, tmp, rb_link, rb_parent);
+ rb_link = &tmp->vm_rb.rb_right;
+ rb_parent = &tmp->vm_rb;
+
+ mm->map_count++;
+ if (!(tmp->vm_flags & VM_WIPEONFORK))
+ retval = copy_page_range(mm, oldmm, mpnt);
+
+ if (tmp->vm_ops && tmp->vm_ops->open)
+ tmp->vm_ops->open(tmp);
+
+ if (retval)
+ goto out;
+ }
+ /* a new mm has just been created */
+ arch_dup_mmap(oldmm, mm);
+ retval = 0;
+out:
+ up_write(&mm->mmap_sem);
+ flush_tlb_mm(oldmm);
+ up_write(&oldmm->mmap_sem);
+ dup_userfaultfd_complete(&uf);
+fail_uprobe_end:
+ uprobe_end_dup_mmap();
+ return retval;
+fail_nomem_anon_vma_fork:
+ mpol_put(vma_policy(tmp));
+fail_nomem_policy:
+ kmem_cache_free(vm_area_cachep, tmp);
+fail_nomem:
+ retval = -ENOMEM;
+ vm_unacct_memory(charge);
+ goto out;
+}
+
+static inline int mm_alloc_pgd(struct mm_struct *mm)
+{
+ mm->pgd = pgd_alloc(mm);
+ if (unlikely(!mm->pgd))
+ return -ENOMEM;
+ return 0;
+}
+
+static inline void mm_free_pgd(struct mm_struct *mm)
+{
+ pgd_free(mm, mm->pgd);
+}
+#else
+static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+ down_write(&oldmm->mmap_sem);
+ RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
+ up_write(&oldmm->mmap_sem);
+ return 0;
+}
+#define mm_alloc_pgd(mm) (0)
+#define mm_free_pgd(mm)
+#endif /* CONFIG_MMU */
+
+static void check_mm(struct mm_struct *mm)
+{
+ int i;
+
+ for (i = 0; i < NR_MM_COUNTERS; i++) {
+ long x = atomic_long_read(&mm->rss_stat.count[i]);
+
+ if (unlikely(x))
+ printk(KERN_ALERT "BUG: Bad rss-counter state "
+ "mm:%p idx:%d val:%ld\n", mm, i, x);
+ }
+
+ if (mm_pgtables_bytes(mm))
+ pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n",
+ mm_pgtables_bytes(mm));
+
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
+ VM_BUG_ON_MM(mm->pmd_huge_pte, mm);
+#endif
+}
+
+#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
+#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
+
+/*
+ * Called when the last reference to the mm
+ * is dropped: either by a lazy thread or by
+ * mmput. Free the page directory and the mm.
+ */
+static void __mmdrop(struct mm_struct *mm)
+{
+ BUG_ON(mm == &init_mm);
+ mm_free_pgd(mm);
+ destroy_context(mm);
+ hmm_mm_destroy(mm);
+ mmu_notifier_mm_destroy(mm);
+ check_mm(mm);
+ put_user_ns(mm->user_ns);
+ free_mm(mm);
+}
+
+void mmdrop(struct mm_struct *mm)
+{
+ if (unlikely(atomic_dec_and_test(&mm->mm_count)))
+ __mmdrop(mm);
+}
+EXPORT_SYMBOL_GPL(mmdrop);
+
+static void mmdrop_async_fn(struct work_struct *work)
+{
+ struct mm_struct *mm;
+
+ mm = container_of(work, struct mm_struct, async_put_work);
+ __mmdrop(mm);
+}
+
+static void mmdrop_async(struct mm_struct *mm)
+{
+ if (unlikely(atomic_dec_and_test(&mm->mm_count))) {
+ INIT_WORK(&mm->async_put_work, mmdrop_async_fn);
+ schedule_work(&mm->async_put_work);
+ }
+}
+
static inline void free_signal_struct(struct signal_struct *sig)
{
taskstats_tgid_free(sig);
@@ -457,6 +694,21 @@ static void set_max_threads(unsigned int max_threads_suggested)
int arch_task_struct_size __read_mostly;
#endif
+static void task_struct_whitelist(unsigned long *offset, unsigned long *size)
+{
+ /* Fetch thread_struct whitelist for the architecture. */
+ arch_thread_struct_whitelist(offset, size);
+
+ /*
+ * Handle zero-sized whitelist or empty thread_struct, otherwise
+ * adjust offset to position of thread_struct in task_struct.
+ */
+ if (unlikely(*size == 0))
+ *offset = 0;
+ else
+ *offset += offsetof(struct task_struct, thread);
+}
+
void __init fork_init(void)
{
int i;
@@ -465,11 +717,14 @@ void __init fork_init(void)
#define ARCH_MIN_TASKALIGN 0
#endif
int align = max_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN);
+ unsigned long useroffset, usersize;
/* create a slab on which task_structs can be allocated */
- task_struct_cachep = kmem_cache_create("task_struct",
+ task_struct_whitelist(&useroffset, &usersize);
+ task_struct_cachep = kmem_cache_create_usercopy("task_struct",
arch_task_struct_size, align,
- SLAB_PANIC|SLAB_ACCOUNT, NULL);
+ SLAB_PANIC|SLAB_ACCOUNT,
+ useroffset, usersize, NULL);
#endif
/* do the arch specific task caches init */
@@ -594,181 +849,8 @@ free_tsk:
return NULL;
}
-#ifdef CONFIG_MMU
-static __latent_entropy int dup_mmap(struct mm_struct *mm,
- struct mm_struct *oldmm)
-{
- struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
- struct rb_node **rb_link, *rb_parent;
- int retval;
- unsigned long charge;
- LIST_HEAD(uf);
-
- uprobe_start_dup_mmap();
- if (down_write_killable(&oldmm->mmap_sem)) {
- retval = -EINTR;
- goto fail_uprobe_end;
- }
- flush_cache_dup_mm(oldmm);
- uprobe_dup_mmap(oldmm, mm);
- /*
- * Not linked in yet - no deadlock potential:
- */
- down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
-
- /* No ordering required: file already has been exposed. */
- RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
-
- mm->total_vm = oldmm->total_vm;
- mm->data_vm = oldmm->data_vm;
- mm->exec_vm = oldmm->exec_vm;
- mm->stack_vm = oldmm->stack_vm;
-
- rb_link = &mm->mm_rb.rb_node;
- rb_parent = NULL;
- pprev = &mm->mmap;
- retval = ksm_fork(mm, oldmm);
- if (retval)
- goto out;
- retval = khugepaged_fork(mm, oldmm);
- if (retval)
- goto out;
-
- prev = NULL;
- for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
- struct file *file;
-
- if (mpnt->vm_flags & VM_DONTCOPY) {
- vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt));
- continue;
- }
- charge = 0;
- if (mpnt->vm_flags & VM_ACCOUNT) {
- unsigned long len = vma_pages(mpnt);
-
- if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
- goto fail_nomem;
- charge = len;
- }
- tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
- if (!tmp)
- goto fail_nomem;
- *tmp = *mpnt;
- INIT_LIST_HEAD(&tmp->anon_vma_chain);
- retval = vma_dup_policy(mpnt, tmp);
- if (retval)
- goto fail_nomem_policy;
- tmp->vm_mm = mm;
- retval = dup_userfaultfd(tmp, &uf);
- if (retval)
- goto fail_nomem_anon_vma_fork;
- if (tmp->vm_flags & VM_WIPEONFORK) {
- /* VM_WIPEONFORK gets a clean slate in the child. */
- tmp->anon_vma = NULL;
- if (anon_vma_prepare(tmp))
- goto fail_nomem_anon_vma_fork;
- } else if (anon_vma_fork(tmp, mpnt))
- goto fail_nomem_anon_vma_fork;
- tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT);
- tmp->vm_next = tmp->vm_prev = NULL;
- file = tmp->vm_file;
- if (file) {
- struct inode *inode = file_inode(file);
- struct address_space *mapping = file->f_mapping;
-
- get_file(file);
- if (tmp->vm_flags & VM_DENYWRITE)
- atomic_dec(&inode->i_writecount);
- i_mmap_lock_write(mapping);
- if (tmp->vm_flags & VM_SHARED)
- atomic_inc(&mapping->i_mmap_writable);
- flush_dcache_mmap_lock(mapping);
- /* insert tmp into the share list, just after mpnt */
- vma_interval_tree_insert_after(tmp, mpnt,
- &mapping->i_mmap);
- flush_dcache_mmap_unlock(mapping);
- i_mmap_unlock_write(mapping);
- }
-
- /*
- * Clear hugetlb-related page reserves for children. This only
- * affects MAP_PRIVATE mappings. Faults generated by the child
- * are not guaranteed to succeed, even if read-only
- */
- if (is_vm_hugetlb_page(tmp))
- reset_vma_resv_huge_pages(tmp);
-
- /*
- * Link in the new vma and copy the page table entries.
- */
- *pprev = tmp;
- pprev = &tmp->vm_next;
- tmp->vm_prev = prev;
- prev = tmp;
-
- __vma_link_rb(mm, tmp, rb_link, rb_parent);
- rb_link = &tmp->vm_rb.rb_right;
- rb_parent = &tmp->vm_rb;
-
- mm->map_count++;
- if (!(tmp->vm_flags & VM_WIPEONFORK))
- retval = copy_page_range(mm, oldmm, mpnt);
-
- if (tmp->vm_ops && tmp->vm_ops->open)
- tmp->vm_ops->open(tmp);
-
- if (retval)
- goto out;
- }
- /* a new mm has just been created */
- retval = arch_dup_mmap(oldmm, mm);
-out:
- up_write(&mm->mmap_sem);
- flush_tlb_mm(oldmm);
- up_write(&oldmm->mmap_sem);
- dup_userfaultfd_complete(&uf);
-fail_uprobe_end:
- uprobe_end_dup_mmap();
- return retval;
-fail_nomem_anon_vma_fork:
- mpol_put(vma_policy(tmp));
-fail_nomem_policy:
- kmem_cache_free(vm_area_cachep, tmp);
-fail_nomem:
- retval = -ENOMEM;
- vm_unacct_memory(charge);
- goto out;
-}
-
-static inline int mm_alloc_pgd(struct mm_struct *mm)
-{
- mm->pgd = pgd_alloc(mm);
- if (unlikely(!mm->pgd))
- return -ENOMEM;
- return 0;
-}
-
-static inline void mm_free_pgd(struct mm_struct *mm)
-{
- pgd_free(mm, mm->pgd);
-}
-#else
-static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
-{
- down_write(&oldmm->mmap_sem);
- RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
- up_write(&oldmm->mmap_sem);
- return 0;
-}
-#define mm_alloc_pgd(mm) (0)
-#define mm_free_pgd(mm)
-#endif /* CONFIG_MMU */
-
__cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
-#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
-#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
-
static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT;
static int __init coredump_filter_setup(char *s)
@@ -858,27 +940,6 @@ fail_nopgd:
return NULL;
}
-static void check_mm(struct mm_struct *mm)
-{
- int i;
-
- for (i = 0; i < NR_MM_COUNTERS; i++) {
- long x = atomic_long_read(&mm->rss_stat.count[i]);
-
- if (unlikely(x))
- printk(KERN_ALERT "BUG: Bad rss-counter state "
- "mm:%p idx:%d val:%ld\n", mm, i, x);
- }
-
- if (mm_pgtables_bytes(mm))
- pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n",
- mm_pgtables_bytes(mm));
-
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
- VM_BUG_ON_MM(mm->pmd_huge_pte, mm);
-#endif
-}
-
/*
* Allocate and initialize an mm_struct.
*/
@@ -894,24 +955,6 @@ struct mm_struct *mm_alloc(void)
return mm_init(mm, current, current_user_ns());
}
-/*
- * Called when the last reference to the mm
- * is dropped: either by a lazy thread or by
- * mmput. Free the page directory and the mm.
- */
-void __mmdrop(struct mm_struct *mm)
-{
- BUG_ON(mm == &init_mm);
- mm_free_pgd(mm);
- destroy_context(mm);
- hmm_mm_destroy(mm);
- mmu_notifier_mm_destroy(mm);
- check_mm(mm);
- put_user_ns(mm->user_ns);
- free_mm(mm);
-}
-EXPORT_SYMBOL_GPL(__mmdrop);
-
static inline void __mmput(struct mm_struct *mm)
{
VM_BUG_ON(atomic_read(&mm->mm_users));
@@ -2224,9 +2267,11 @@ void __init proc_caches_init(void)
* maximum number of CPU's we can ever have. The cpumask_allocation
* is at the end of the structure, exactly for that reason.
*/
- mm_cachep = kmem_cache_create("mm_struct",
+ mm_cachep = kmem_cache_create_usercopy("mm_struct",
sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
+ offsetof(struct mm_struct, saved_auxv),
+ sizeof_field(struct mm_struct, saved_auxv),
NULL);
vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT);
mmap_init();
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 4e8089b319ae..8c82ea26e837 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -71,7 +71,7 @@ unsigned long probe_irq_on(void)
raw_spin_lock_irq(&desc->lock);
if (!desc->action && irq_settings_can_probe(desc)) {
desc->istate |= IRQS_AUTODETECT | IRQS_WAITING;
- if (irq_startup(desc, IRQ_NORESEND, IRQ_START_FORCE))
+ if (irq_activate_and_startup(desc, IRQ_NORESEND))
desc->istate |= IRQS_PENDING;
}
raw_spin_unlock_irq(&desc->lock);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 043bfc35b353..c69357a43849 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -294,11 +294,11 @@ int irq_activate(struct irq_desc *desc)
return 0;
}
-void irq_activate_and_startup(struct irq_desc *desc, bool resend)
+int irq_activate_and_startup(struct irq_desc *desc, bool resend)
{
if (WARN_ON(irq_activate(desc)))
- return;
- irq_startup(desc, resend, IRQ_START_FORCE);
+ return 0;
+ return irq_startup(desc, resend, IRQ_START_FORCE);
}
static void __irq_disable(struct irq_desc *desc, bool mask);
diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h
index e4d3819a91cc..8ccb326d2977 100644
--- a/kernel/irq/debug.h
+++ b/kernel/irq/debug.h
@@ -3,8 +3,6 @@
* Debugging printout:
*/
-#include <linux/kallsyms.h>
-
#define ___P(f) if (desc->status_use_accessors & f) printk("%14s set\n", #f)
#define ___PS(f) if (desc->istate & f) printk("%14s set\n", #f)
/* FIXME */
@@ -19,14 +17,14 @@ static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n",
irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled);
- printk("->handle_irq(): %p, ", desc->handle_irq);
- print_symbol("%s\n", (unsigned long)desc->handle_irq);
- printk("->irq_data.chip(): %p, ", desc->irq_data.chip);
- print_symbol("%s\n", (unsigned long)desc->irq_data.chip);
+ printk("->handle_irq(): %p, %pS\n",
+ desc->handle_irq, desc->handle_irq);
+ printk("->irq_data.chip(): %p, %pS\n",
+ desc->irq_data.chip, desc->irq_data.chip);
printk("->action(): %p\n", desc->action);
if (desc->action) {
- printk("->action->handler(): %p, ", desc->action->handler);
- print_symbol("%s\n", (unsigned long)desc->action->handler);
+ printk("->action->handler(): %p, %pS\n",
+ desc->action->handler, desc->action->handler);
}
___P(IRQ_LEVEL);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index ab19371eab9b..ca6afa267070 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -76,7 +76,7 @@ extern void __enable_irq(struct irq_desc *desc);
#define IRQ_START_COND false
extern int irq_activate(struct irq_desc *desc);
-extern void irq_activate_and_startup(struct irq_desc *desc, bool resend);
+extern int irq_activate_and_startup(struct irq_desc *desc, bool resend);
extern int irq_startup(struct irq_desc *desc, bool resend, bool force);
extern void irq_shutdown(struct irq_desc *desc);
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index d5fa4116688a..a23e21ada81b 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -12,7 +12,6 @@
* compression (see scripts/kallsyms.c for a more complete description)
*/
#include <linux/kallsyms.h>
-#include <linux/module.h>
#include <linux/init.h>
#include <linux/seq_file.h>
#include <linux/fs.h>
@@ -20,15 +19,12 @@
#include <linux/err.h>
#include <linux/proc_fs.h>
#include <linux/sched.h> /* for cond_resched */
-#include <linux/mm.h>
#include <linux/ctype.h>
#include <linux/slab.h>
#include <linux/filter.h>
#include <linux/ftrace.h>
#include <linux/compiler.h>
-#include <asm/sections.h>
-
/*
* These will be re-linked against their real values
* during the second link stage.
@@ -52,37 +48,6 @@ extern const u16 kallsyms_token_index[] __weak;
extern const unsigned long kallsyms_markers[] __weak;
-static inline int is_kernel_inittext(unsigned long addr)
-{
- if (addr >= (unsigned long)_sinittext
- && addr <= (unsigned long)_einittext)
- return 1;
- return 0;
-}
-
-static inline int is_kernel_text(unsigned long addr)
-{
- if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) ||
- arch_is_kernel_text(addr))
- return 1;
- return in_gate_area_no_mm(addr);
-}
-
-static inline int is_kernel(unsigned long addr)
-{
- if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end)
- return 1;
- return in_gate_area_no_mm(addr);
-}
-
-static int is_ksym_addr(unsigned long addr)
-{
- if (IS_ENABLED(CONFIG_KALLSYMS_ALL))
- return is_kernel(addr);
-
- return is_kernel_text(addr) || is_kernel_inittext(addr);
-}
-
/*
* Expand a compressed symbol data into the resulting uncompressed string,
* if uncompressed string is too long (>= maxlen), it will be truncated,
@@ -464,17 +429,6 @@ int sprint_backtrace(char *buffer, unsigned long address)
return __sprint_symbol(buffer, address, -1, 1);
}
-/* Look up a kernel symbol and print it to the kernel messages. */
-void __print_symbol(const char *fmt, unsigned long address)
-{
- char buffer[KSYM_SYMBOL_LEN];
-
- sprint_symbol(buffer, address);
-
- printk(fmt, buffer);
-}
-EXPORT_SYMBOL(__print_symbol);
-
/* To avoid using get_symbol_offset for every symbol, we carry prefix along. */
struct kallsym_iter {
loff_t pos;
diff --git a/kernel/module.c b/kernel/module.c
index 1d65b2cc4f80..ccdf24c4949e 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3953,6 +3953,12 @@ static const char *get_ksymbol(struct module *mod,
return symname(kallsyms, best);
}
+void * __weak dereference_module_function_descriptor(struct module *mod,
+ void *ptr)
+{
+ return ptr;
+}
+
/* For kallsyms to ask for address resolution. NULL means not found. Careful
* not to lock to avoid deadlock on oopses, simply disable preemption. */
const char *module_address_lookup(unsigned long addr,
diff --git a/kernel/power/power.h b/kernel/power/power.h
index f29cd178df90..9e58bdc8a562 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -104,9 +104,6 @@ extern int in_suspend;
extern dev_t swsusp_resume_device;
extern sector_t swsusp_resume_block;
-extern asmlinkage int swsusp_arch_suspend(void);
-extern asmlinkage int swsusp_arch_resume(void);
-
extern int create_basic_memory_bitmaps(void);
extern void free_basic_memory_bitmaps(void);
extern int hibernate_preallocate_memory(void);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index c2e713f6ae2e..db4b9b8929eb 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -131,13 +131,10 @@ static int __init control_devkmsg(char *str)
/*
* Set sysctl string accordingly:
*/
- if (devkmsg_log == DEVKMSG_LOG_MASK_ON) {
- memset(devkmsg_log_str, 0, DEVKMSG_STR_MAX_SIZE);
- strncpy(devkmsg_log_str, "on", 2);
- } else if (devkmsg_log == DEVKMSG_LOG_MASK_OFF) {
- memset(devkmsg_log_str, 0, DEVKMSG_STR_MAX_SIZE);
- strncpy(devkmsg_log_str, "off", 3);
- }
+ if (devkmsg_log == DEVKMSG_LOG_MASK_ON)
+ strcpy(devkmsg_log_str, "on");
+ else if (devkmsg_log == DEVKMSG_LOG_MASK_OFF)
+ strcpy(devkmsg_log_str, "off");
/* else "ratelimit" which is set by default. */
/*
@@ -277,6 +274,13 @@ EXPORT_SYMBOL(console_set_on_cmdline);
/* Flag: console code may call schedule() */
static int console_may_schedule;
+enum con_msg_format_flags {
+ MSG_FORMAT_DEFAULT = 0,
+ MSG_FORMAT_SYSLOG = (1 << 0),
+};
+
+static int console_msg_format = MSG_FORMAT_DEFAULT;
+
/*
* The printk log buffer consists of a chain of concatenated variable
* length records. Every record starts with a record header, containing
@@ -1544,6 +1548,146 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
}
/*
+ * Special console_lock variants that help to reduce the risk of soft-lockups.
+ * They allow to pass console_lock to another printk() call using a busy wait.
+ */
+
+#ifdef CONFIG_LOCKDEP
+static struct lockdep_map console_owner_dep_map = {
+ .name = "console_owner"
+};
+#endif
+
+static DEFINE_RAW_SPINLOCK(console_owner_lock);
+static struct task_struct *console_owner;
+static bool console_waiter;
+
+/**
+ * console_lock_spinning_enable - mark beginning of code where another
+ * thread might safely busy wait
+ *
+ * This basically converts console_lock into a spinlock. This marks
+ * the section where the console_lock owner can not sleep, because
+ * there may be a waiter spinning (like a spinlock). Also it must be
+ * ready to hand over the lock at the end of the section.
+ */
+static void console_lock_spinning_enable(void)
+{
+ raw_spin_lock(&console_owner_lock);
+ console_owner = current;
+ raw_spin_unlock(&console_owner_lock);
+
+ /* The waiter may spin on us after setting console_owner */
+ spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
+}
+
+/**
+ * console_lock_spinning_disable_and_check - mark end of code where another
+ * thread was able to busy wait and check if there is a waiter
+ *
+ * This is called at the end of the section where spinning is allowed.
+ * It has two functions. First, it is a signal that it is no longer
+ * safe to start busy waiting for the lock. Second, it checks if
+ * there is a busy waiter and passes the lock rights to her.
+ *
+ * Important: Callers lose the lock if there was a busy waiter.
+ * They must not touch items synchronized by console_lock
+ * in this case.
+ *
+ * Return: 1 if the lock rights were passed, 0 otherwise.
+ */
+static int console_lock_spinning_disable_and_check(void)
+{
+ int waiter;
+
+ raw_spin_lock(&console_owner_lock);
+ waiter = READ_ONCE(console_waiter);
+ console_owner = NULL;
+ raw_spin_unlock(&console_owner_lock);
+
+ if (!waiter) {
+ spin_release(&console_owner_dep_map, 1, _THIS_IP_);
+ return 0;
+ }
+
+ /* The waiter is now free to continue */
+ WRITE_ONCE(console_waiter, false);
+
+ spin_release(&console_owner_dep_map, 1, _THIS_IP_);
+
+ /*
+ * Hand off console_lock to waiter. The waiter will perform
+ * the up(). After this, the waiter is the console_lock owner.
+ */
+ mutex_release(&console_lock_dep_map, 1, _THIS_IP_);
+ return 1;
+}
+
+/**
+ * console_trylock_spinning - try to get console_lock by busy waiting
+ *
+ * This allows to busy wait for the console_lock when the current
+ * owner is running in specially marked sections. It means that
+ * the current owner is running and cannot reschedule until it
+ * is ready to lose the lock.
+ *
+ * Return: 1 if we got the lock, 0 othrewise
+ */
+static int console_trylock_spinning(void)
+{
+ struct task_struct *owner = NULL;
+ bool waiter;
+ bool spin = false;
+ unsigned long flags;
+
+ if (console_trylock())
+ return 1;
+
+ printk_safe_enter_irqsave(flags);
+
+ raw_spin_lock(&console_owner_lock);
+ owner = READ_ONCE(console_owner);
+ waiter = READ_ONCE(console_waiter);
+ if (!waiter && owner && owner != current) {
+ WRITE_ONCE(console_waiter, true);
+ spin = true;
+ }
+ raw_spin_unlock(&console_owner_lock);
+
+ /*
+ * If there is an active printk() writing to the
+ * consoles, instead of having it write our data too,
+ * see if we can offload that load from the active
+ * printer, and do some printing ourselves.
+ * Go into a spin only if there isn't already a waiter
+ * spinning, and there is an active printer, and
+ * that active printer isn't us (recursive printk?).
+ */
+ if (!spin) {
+ printk_safe_exit_irqrestore(flags);
+ return 0;
+ }
+
+ /* We spin waiting for the owner to release us */
+ spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
+ /* Owner will clear console_waiter on hand off */
+ while (READ_ONCE(console_waiter))
+ cpu_relax();
+ spin_release(&console_owner_dep_map, 1, _THIS_IP_);
+
+ printk_safe_exit_irqrestore(flags);
+ /*
+ * The owner passed the console lock to us.
+ * Since we did not spin on console lock, annotate
+ * this as a trylock. Otherwise lockdep will
+ * complain.
+ */
+ mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_);
+
+ return 1;
+}
+
+/*
* Call the console drivers, asking them to write out
* log_buf[start] to log_buf[end - 1].
* The console_lock must be held.
@@ -1749,12 +1893,19 @@ asmlinkage int vprintk_emit(int facility, int level,
/* If called from the scheduler, we can not call up(). */
if (!in_sched) {
/*
+ * Disable preemption to avoid being preempted while holding
+ * console_sem which would prevent anyone from printing to
+ * console
+ */
+ preempt_disable();
+ /*
* Try to acquire and then immediately release the console
* semaphore. The release will print out buffers and wake up
* /dev/kmsg and syslog() users.
*/
- if (console_trylock())
+ if (console_trylock_spinning())
console_unlock();
+ preempt_enable();
}
return printed_len;
@@ -1855,6 +2006,8 @@ static ssize_t msg_print_ext_header(char *buf, size_t size,
static ssize_t msg_print_ext_body(char *buf, size_t size,
char *dict, size_t dict_len,
char *text, size_t text_len) { return 0; }
+static void console_lock_spinning_enable(void) { }
+static int console_lock_spinning_disable_and_check(void) { return 0; }
static void call_console_drivers(const char *ext_text, size_t ext_len,
const char *text, size_t len) {}
static size_t msg_print_text(const struct printk_log *msg,
@@ -1913,6 +2066,17 @@ static int __add_preferred_console(char *name, int idx, char *options,
c->index = idx;
return 0;
}
+
+static int __init console_msg_format_setup(char *str)
+{
+ if (!strcmp(str, "syslog"))
+ console_msg_format = MSG_FORMAT_SYSLOG;
+ if (!strcmp(str, "default"))
+ console_msg_format = MSG_FORMAT_DEFAULT;
+ return 1;
+}
+__setup("console_msg_format=", console_msg_format_setup);
+
/*
* Set up a console. Called via do_early_param() in init/main.c
* for each "console=" parameter in the boot command line.
@@ -2069,20 +2233,7 @@ int console_trylock(void)
return 0;
}
console_locked = 1;
- /*
- * When PREEMPT_COUNT disabled we can't reliably detect if it's
- * safe to schedule (e.g. calling printk while holding a spin_lock),
- * because preempt_disable()/preempt_enable() are just barriers there
- * and preempt_count() is always 0.
- *
- * RCU read sections have a separate preemption counter when
- * PREEMPT_RCU enabled thus we must take extra care and check
- * rcu_preempt_depth(), otherwise RCU read sections modify
- * preempt_count().
- */
- console_may_schedule = !oops_in_progress &&
- preemptible() &&
- !rcu_preempt_depth();
+ console_may_schedule = 0;
return 1;
}
EXPORT_SYMBOL(console_trylock);
@@ -2215,7 +2366,10 @@ skip:
goto skip;
}
- len += msg_print_text(msg, false, text + len, sizeof(text) - len);
+ len += msg_print_text(msg,
+ console_msg_format & MSG_FORMAT_SYSLOG,
+ text + len,
+ sizeof(text) - len);
if (nr_ext_console_drivers) {
ext_len = msg_print_ext_header(ext_text,
sizeof(ext_text),
@@ -2229,14 +2383,29 @@ skip:
console_seq++;
raw_spin_unlock(&logbuf_lock);
+ /*
+ * While actively printing out messages, if another printk()
+ * were to occur on another CPU, it may wait for this one to
+ * finish. This task can not be preempted if there is a
+ * waiter waiting to take over.
+ */
+ console_lock_spinning_enable();
+
stop_critical_timings(); /* don't trace print latency */
call_console_drivers(ext_text, ext_len, text, len);
start_critical_timings();
+
+ if (console_lock_spinning_disable_and_check()) {
+ printk_safe_exit_irqrestore(flags);
+ return;
+ }
+
printk_safe_exit_irqrestore(flags);
if (do_cond_resched)
cond_resched();
}
+
console_locked = 0;
/* Release the exclusive_console once it is used */
diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c
index a43df5193538..bb4b9fe026a1 100644
--- a/kernel/sched/autogroup.c
+++ b/kernel/sched/autogroup.c
@@ -1,13 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
-#include "sched.h"
-
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
-#include <linux/kallsyms.h>
#include <linux/utsname.h>
#include <linux/security.h>
#include <linux/export.h>
+#include "sched.h"
+
unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
static struct autogroup autogroup_default;
static atomic_t autogroup_seq_nr;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 557d46728577..2fb4e27c636a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1374,13 +1374,6 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
- .procname = "hugepages_treat_as_movable",
- .data = &hugepages_treat_as_movable,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
{
.procname = "nr_overcommit_hugepages",
.data = NULL,
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 554b517c61a0..dabd9d167d42 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5015,7 +5015,6 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
parser = &iter->parser;
if (trace_parser_loaded(parser)) {
- parser->buffer[parser->idx] = 0;
ftrace_match_records(iter->hash, parser->buffer, parser->idx);
}
@@ -5329,7 +5328,6 @@ ftrace_graph_release(struct inode *inode, struct file *file)
parser = &fgd->parser;
if (trace_parser_loaded((parser))) {
- parser->buffer[parser->idx] = 0;
ret = ftrace_graph_set_hash(fgd->new_hash,
parser->buffer);
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 32c069bbf41b..56608538a4ad 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -530,8 +530,6 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
ubuf += ret;
cnt -= ret;
- parser.buffer[parser.idx] = 0;
-
ret = -EINVAL;
if (kstrtoul(parser.buffer, 0, &val))
break;
@@ -1236,18 +1234,18 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
cnt--;
}
+ parser->idx = 0;
+
/* only spaces were written */
- if (isspace(ch)) {
+ if (isspace(ch) || !ch) {
*ppos += read;
ret = read;
goto out;
}
-
- parser->idx = 0;
}
/* read the non-space input */
- while (cnt && !isspace(ch)) {
+ while (cnt && !isspace(ch) && ch) {
if (parser->idx < parser->size - 1)
parser->buffer[parser->idx++] = ch;
else {
@@ -1262,12 +1260,14 @@ int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
}
/* We either got finished input or we have to wait for another call. */
- if (isspace(ch)) {
+ if (isspace(ch) || !ch) {
parser->buffer[parser->idx] = 0;
parser->cont = false;
} else if (parser->idx < parser->size - 1) {
parser->cont = true;
parser->buffer[parser->idx++] = ch;
+ /* Make sure the parsed string always terminates with '\0'. */
+ parser->buffer[parser->idx] = 0;
} else {
ret = -EINVAL;
goto out;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 1b87157edbff..05c7172c6667 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -885,8 +885,6 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
if (*parser.buffer == '!')
set = 0;
- parser.buffer[parser.idx] = 0;
-
ret = ftrace_set_clr_event(tr, parser.buffer + !set, set);
if (ret)
goto out_put;
diff --git a/kernel/trace/trace_selftest_dynamic.c b/kernel/trace/trace_selftest_dynamic.c
index 8cda06a10d66..c364cf777e1a 100644
--- a/kernel/trace/trace_selftest_dynamic.c
+++ b/kernel/trace/trace_selftest_dynamic.c
@@ -1,13 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
#include "trace.h"
-int DYN_FTRACE_TEST_NAME(void)
+noinline __noclone int DYN_FTRACE_TEST_NAME(void)
{
/* used to call mcount */
return 0;
}
-int DYN_FTRACE_TEST_NAME2(void)
+noinline __noclone int DYN_FTRACE_TEST_NAME2(void)
{
/* used to call mcount */
return 0;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 40592e7b3568..268029ae1be6 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -608,7 +608,7 @@ static int probes_seq_show(struct seq_file *m, void *v)
/* Don't print "0x (null)" when offset is 0 */
if (tu->offset) {
- seq_printf(m, "0x%p", (void *)tu->offset);
+ seq_printf(m, "0x%px", (void *)tu->offset);
} else {
switch (sizeof(void *)) {
case 4:
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 8dd2e66e8383..017044c26233 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3807,6 +3807,7 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
return ret;
}
+EXPORT_SYMBOL_GPL(apply_workqueue_attrs);
/**
* wq_update_unbound_numa - update NUMA affinity of a wq for CPU hot[un]plug