From c7d8b7824ff9de866a356e1892dbe9f191aa5d06 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 6 Aug 2019 20:15:42 -0300 Subject: hmm: use mmu_notifier_get/put for 'struct hmm' This is a significant simplification, it eliminates all the remaining 'hmm' stuff in mm_struct, eliminates krefing along the critical notifier paths, and takes away all the ugly locking and abuse of page_table_lock. mmu_notifier_get() provides the single struct hmm per struct mm which eliminates mm->hmm. It also directly guarantees that no mmu_notifier op callback is callable while concurrent free is possible, this eliminates all the krefs inside the mmu_notifier callbacks. The remaining krefs in the range code were overly cautious, drivers are already not permitted to free the mirror while a range exists. Link: https://lore.kernel.org/r/20190806231548.25242-6-jgg@ziepe.ca Reviewed-by: Christoph Hellwig Reviewed-by: Ralph Campbell Tested-by: Ralph Campbell Signed-off-by: Jason Gunthorpe --- kernel/fork.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index d8ae0f1b4148..bd4a0762f12f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1007,7 +1007,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_owner(mm, p); RCU_INIT_POINTER(mm->exe_file, NULL); mmu_notifier_mm_init(mm); - hmm_mm_init(mm); init_tlb_flush_pending(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS mm->pmd_huge_pte = NULL; -- cgit v1.2.3-59-g8ed1b From 0c385190392d8c7128fb7517b3c676e19c7b8808 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 18 Aug 2019 11:05:54 +0200 Subject: resource: add a not device managed request_free_mem_region variant Factor out the guts of devm_request_free_mem_region so that we can implement both a device managed and a manually release version as tiny wrappers around it. Link: https://lore.kernel.org/r/20190818090557.17853-2-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Ira Weiny Reviewed-by: Dan Williams Tested-by: Bharata B Rao Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- include/linux/ioport.h | 2 ++ kernel/resource.c | 45 +++++++++++++++++++++++++++++++-------------- 2 files changed, 33 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 5b6a7121c9f0..7bddddfc76d6 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -297,6 +297,8 @@ static inline bool resource_overlaps(struct resource *r1, struct resource *r2) struct resource *devm_request_free_mem_region(struct device *dev, struct resource *base, unsigned long size); +struct resource *request_free_mem_region(struct resource *base, + unsigned long size, const char *name); #endif /* __ASSEMBLY__ */ #endif /* _LINUX_IOPORT_H */ diff --git a/kernel/resource.c b/kernel/resource.c index 7ea4306503c5..74877e9d90ca 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1644,19 +1644,8 @@ void resource_list_free(struct list_head *head) EXPORT_SYMBOL(resource_list_free); #ifdef CONFIG_DEVICE_PRIVATE -/** - * devm_request_free_mem_region - find free region for device private memory - * - * @dev: device struct to bind the resource to - * @size: size in bytes of the device memory to add - * @base: resource tree to look in - * - * This function tries to find an empty range of physical address big enough to - * contain the new resource, so that it can later be hotplugged as ZONE_DEVICE - * memory, which in turn allocates struct pages. - */ -struct resource *devm_request_free_mem_region(struct device *dev, - struct resource *base, unsigned long size) +static struct resource *__request_free_mem_region(struct device *dev, + struct resource *base, unsigned long size, const char *name) { resource_size_t end, addr; struct resource *res; @@ -1670,7 +1659,10 @@ struct resource *devm_request_free_mem_region(struct device *dev, REGION_DISJOINT) continue; - res = devm_request_mem_region(dev, addr, size, dev_name(dev)); + if (dev) + res = devm_request_mem_region(dev, addr, size, name); + else + res = request_mem_region(addr, size, name); if (!res) return ERR_PTR(-ENOMEM); res->desc = IORES_DESC_DEVICE_PRIVATE_MEMORY; @@ -1679,7 +1671,32 @@ struct resource *devm_request_free_mem_region(struct device *dev, return ERR_PTR(-ERANGE); } + +/** + * devm_request_free_mem_region - find free region for device private memory + * + * @dev: device struct to bind the resource to + * @size: size in bytes of the device memory to add + * @base: resource tree to look in + * + * This function tries to find an empty range of physical address big enough to + * contain the new resource, so that it can later be hotplugged as ZONE_DEVICE + * memory, which in turn allocates struct pages. + */ +struct resource *devm_request_free_mem_region(struct device *dev, + struct resource *base, unsigned long size) +{ + return __request_free_mem_region(dev, base, size, dev_name(dev)); +} EXPORT_SYMBOL_GPL(devm_request_free_mem_region); + +struct resource *request_free_mem_region(struct resource *base, + unsigned long size, const char *name) +{ + return __request_free_mem_region(NULL, base, size, name); +} +EXPORT_SYMBOL_GPL(request_free_mem_region); + #endif /* CONFIG_DEVICE_PRIVATE */ static int __init strict_iomem(char *str) -- cgit v1.2.3-59-g8ed1b From fdc029b19dfd190840d23e5c70bd231140b0e4a4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 18 Aug 2019 11:05:55 +0200 Subject: memremap: remove the dev field in struct dev_pagemap The dev field in struct dev_pagemap is only used to print dev_name in two places, which are at best nice to have. Just remove the field and thus the name in those two messages. Link: https://lore.kernel.org/r/20190818090557.17853-3-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Ira Weiny Reviewed-by: Dan Williams Tested-by: Bharata B Rao Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- include/linux/memremap.h | 1 - kernel/memremap.c | 6 +----- mm/page_alloc.c | 2 +- tools/testing/nvdimm/test/iomap.c | 1 - 4 files changed, 2 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index f8a5b2a19945..8f0013e18e14 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -109,7 +109,6 @@ struct dev_pagemap { struct percpu_ref *ref; struct percpu_ref internal_ref; struct completion done; - struct device *dev; enum memory_type type; unsigned int flags; u64 pci_p2pdma_bus_offset; diff --git a/kernel/memremap.c b/kernel/memremap.c index 6ee03a816d67..600a14cbe663 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -96,7 +96,6 @@ static void dev_pagemap_cleanup(struct dev_pagemap *pgmap) static void devm_memremap_pages_release(void *data) { struct dev_pagemap *pgmap = data; - struct device *dev = pgmap->dev; struct resource *res = &pgmap->res; unsigned long pfn; int nid; @@ -123,8 +122,7 @@ static void devm_memremap_pages_release(void *data) untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res)); pgmap_array_delete(res); - dev_WARN_ONCE(dev, pgmap->altmap.alloc, - "%s: failed to free all reserved pages\n", __func__); + WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n"); } static void dev_pagemap_percpu_release(struct percpu_ref *ref) @@ -245,8 +243,6 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) goto err_array; } - pgmap->dev = dev; - error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(res->start), PHYS_PFN(res->end), pgmap, GFP_KERNEL)); if (error) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 272c6de1bf4e..b39baa2b1faf 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5982,7 +5982,7 @@ void __ref memmap_init_zone_device(struct zone *zone, } } - pr_info("%s initialised, %lu pages in %ums\n", dev_name(pgmap->dev), + pr_info("%s initialised %lu pages in %ums\n", __func__, size, jiffies_to_msecs(jiffies - start)); } diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index cd040b5abffe..3f55f2f99112 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -132,7 +132,6 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) if (!nfit_res) return devm_memremap_pages(dev, pgmap); - pgmap->dev = dev; if (!pgmap->ref) { if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup)) return ERR_PTR(-EINVAL); -- cgit v1.2.3-59-g8ed1b From 6f42193fd86eb5eb518984032c85e9b5582b7625 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 18 Aug 2019 11:05:56 +0200 Subject: memremap: don't use a separate devm action for devmap_managed_enable_get Just clean up for early failures and then piggy back on devm_memremap_pages_release. This helps with a pending not device managed version of devm_memremap_pages. Link: https://lore.kernel.org/r/20190818090557.17853-4-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Ira Weiny Reviewed-by: Dan Williams Tested-by: Bharata B Rao Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- kernel/memremap.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/memremap.c b/kernel/memremap.c index 600a14cbe663..09a087ca30ff 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -21,13 +21,13 @@ DEFINE_STATIC_KEY_FALSE(devmap_managed_key); EXPORT_SYMBOL(devmap_managed_key); static atomic_t devmap_managed_enable; -static void devmap_managed_enable_put(void *data) +static void devmap_managed_enable_put(void) { if (atomic_dec_and_test(&devmap_managed_enable)) static_branch_disable(&devmap_managed_key); } -static int devmap_managed_enable_get(struct device *dev, struct dev_pagemap *pgmap) +static int devmap_managed_enable_get(struct dev_pagemap *pgmap) { if (!pgmap->ops || !pgmap->ops->page_free) { WARN(1, "Missing page_free method\n"); @@ -36,13 +36,16 @@ static int devmap_managed_enable_get(struct device *dev, struct dev_pagemap *pgm if (atomic_inc_return(&devmap_managed_enable) == 1) static_branch_enable(&devmap_managed_key); - return devm_add_action_or_reset(dev, devmap_managed_enable_put, NULL); + return 0; } #else -static int devmap_managed_enable_get(struct device *dev, struct dev_pagemap *pgmap) +static int devmap_managed_enable_get(struct dev_pagemap *pgmap) { return -EINVAL; } +static void devmap_managed_enable_put(void) +{ +} #endif /* CONFIG_DEV_PAGEMAP_OPS */ static void pgmap_array_delete(struct resource *res) @@ -123,6 +126,7 @@ static void devm_memremap_pages_release(void *data) untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res)); pgmap_array_delete(res); WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n"); + devmap_managed_enable_put(); } static void dev_pagemap_percpu_release(struct percpu_ref *ref) @@ -212,7 +216,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) } if (need_devmap_managed) { - error = devmap_managed_enable_get(dev, pgmap); + error = devmap_managed_enable_get(pgmap); if (error) return ERR_PTR(error); } @@ -321,6 +325,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) err_array: dev_pagemap_kill(pgmap); dev_pagemap_cleanup(pgmap); + devmap_managed_enable_put(); return ERR_PTR(error); } EXPORT_SYMBOL_GPL(devm_memremap_pages); -- cgit v1.2.3-59-g8ed1b From 6869b7b206595ae0e326f59719090351eb8f4f5d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 18 Aug 2019 11:05:57 +0200 Subject: memremap: provide a not device managed memremap_pages The kvmppc ultravisor code wants a device private memory pool that is system wide and not attached to a device. Instead of faking up one provide a low-level memremap_pages for it. Link: https://lore.kernel.org/r/20190818090557.17853-5-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Ira Weiny Reviewed-by: Dan Williams Tested-by: Bharata B Rao Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- include/linux/memremap.h | 2 ++ kernel/memremap.c | 84 ++++++++++++++++++++++++++++++------------------ 2 files changed, 54 insertions(+), 32 deletions(-) (limited to 'kernel') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 8f0013e18e14..fb2a0bd826b9 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -123,6 +123,8 @@ static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap) } #ifdef CONFIG_ZONE_DEVICE +void *memremap_pages(struct dev_pagemap *pgmap, int nid); +void memunmap_pages(struct dev_pagemap *pgmap); void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap); struct dev_pagemap *get_dev_pagemap(unsigned long pfn, diff --git a/kernel/memremap.c b/kernel/memremap.c index 09a087ca30ff..77a77704eb28 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -96,9 +96,8 @@ static void dev_pagemap_cleanup(struct dev_pagemap *pgmap) } } -static void devm_memremap_pages_release(void *data) +void memunmap_pages(struct dev_pagemap *pgmap) { - struct dev_pagemap *pgmap = data; struct resource *res = &pgmap->res; unsigned long pfn; int nid; @@ -128,6 +127,12 @@ static void devm_memremap_pages_release(void *data) WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n"); devmap_managed_enable_put(); } +EXPORT_SYMBOL_GPL(memunmap_pages); + +static void devm_memremap_pages_release(void *data) +{ + memunmap_pages(data); +} static void dev_pagemap_percpu_release(struct percpu_ref *ref) { @@ -137,27 +142,12 @@ static void dev_pagemap_percpu_release(struct percpu_ref *ref) complete(&pgmap->done); } -/** - * devm_memremap_pages - remap and provide memmap backing for the given resource - * @dev: hosting device for @res - * @pgmap: pointer to a struct dev_pagemap - * - * Notes: - * 1/ At a minimum the res and type members of @pgmap must be initialized - * by the caller before passing it to this function - * - * 2/ The altmap field may optionally be initialized, in which case - * PGMAP_ALTMAP_VALID must be set in pgmap->flags. - * - * 3/ The ref field may optionally be provided, in which pgmap->ref must be - * 'live' on entry and will be killed and reaped at - * devm_memremap_pages_release() time, or if this routine fails. - * - * 4/ res is expected to be a host memory range that could feasibly be - * treated as a "System RAM" range, i.e. not a device mmio range, but - * this is not enforced. +/* + * Not device managed version of dev_memremap_pages, undone by + * memunmap_pages(). Please use dev_memremap_pages if you have a struct + * device available. */ -void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) +void *memremap_pages(struct dev_pagemap *pgmap, int nid) { struct resource *res = &pgmap->res; struct dev_pagemap *conflict_pgmap; @@ -168,7 +158,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) .altmap = pgmap_altmap(pgmap), }; pgprot_t pgprot = PAGE_KERNEL; - int error, nid, is_ram; + int error, is_ram; bool need_devmap_managed = true; switch (pgmap->type) { @@ -223,7 +213,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) conflict_pgmap = get_dev_pagemap(PHYS_PFN(res->start), NULL); if (conflict_pgmap) { - dev_WARN(dev, "Conflicting mapping in same section\n"); + WARN(1, "Conflicting mapping in same section\n"); put_dev_pagemap(conflict_pgmap); error = -ENOMEM; goto err_array; @@ -231,7 +221,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) conflict_pgmap = get_dev_pagemap(PHYS_PFN(res->end), NULL); if (conflict_pgmap) { - dev_WARN(dev, "Conflicting mapping in same section\n"); + WARN(1, "Conflicting mapping in same section\n"); put_dev_pagemap(conflict_pgmap); error = -ENOMEM; goto err_array; @@ -252,7 +242,6 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) if (error) goto err_array; - nid = dev_to_node(dev); if (nid < 0) nid = numa_mem_id(); @@ -308,12 +297,6 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) PHYS_PFN(res->start), PHYS_PFN(resource_size(res)), pgmap); percpu_ref_get_many(pgmap->ref, pfn_end(pgmap) - pfn_first(pgmap)); - - error = devm_add_action_or_reset(dev, devm_memremap_pages_release, - pgmap); - if (error) - return ERR_PTR(error); - return __va(res->start); err_add_memory: @@ -328,6 +311,43 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) devmap_managed_enable_put(); return ERR_PTR(error); } +EXPORT_SYMBOL_GPL(memremap_pages); + +/** + * devm_memremap_pages - remap and provide memmap backing for the given resource + * @dev: hosting device for @res + * @pgmap: pointer to a struct dev_pagemap + * + * Notes: + * 1/ At a minimum the res and type members of @pgmap must be initialized + * by the caller before passing it to this function + * + * 2/ The altmap field may optionally be initialized, in which case + * PGMAP_ALTMAP_VALID must be set in pgmap->flags. + * + * 3/ The ref field may optionally be provided, in which pgmap->ref must be + * 'live' on entry and will be killed and reaped at + * devm_memremap_pages_release() time, or if this routine fails. + * + * 4/ res is expected to be a host memory range that could feasibly be + * treated as a "System RAM" range, i.e. not a device mmio range, but + * this is not enforced. + */ +void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) +{ + int error; + void *ret; + + ret = memremap_pages(pgmap, dev_to_node(dev)); + if (IS_ERR(ret)) + return ret; + + error = devm_add_action_or_reset(dev, devm_memremap_pages_release, + pgmap); + if (error) + return ERR_PTR(error); + return ret; +} EXPORT_SYMBOL_GPL(devm_memremap_pages); void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap) -- cgit v1.2.3-59-g8ed1b From 312364f3534cc974b79a96d062bde2386315201f Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 26 Aug 2019 22:14:23 +0200 Subject: kernel.h: Add non_block_start/end() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In some special cases we must not block, but there's not a spinlock, preempt-off, irqs-off or similar critical section already that arms the might_sleep() debug checks. Add a non_block_start/end() pair to annotate these. This will be used in the oom paths of mmu-notifiers, where blocking is not allowed to make sure there's forward progress. Quoting Michal: "The notifier is called from quite a restricted context - oom_reaper - which shouldn't depend on any locks or sleepable conditionals. The code should be swift as well but we mostly do care about it to make a forward progress. Checking for sleepable context is the best thing we could come up with that would describe these demands at least partially." Peter also asked whether we want to catch spinlocks on top, but Michal said those are less of a problem because spinlocks can't have an indirect dependency upon the page allocator and hence close the loop with the oom reaper. Suggested by Michal Hocko. Link: https://lore.kernel.org/r/20190826201425.17547-4-daniel.vetter@ffwll.ch Acked-by: Christian König (v1) Acked-by: Peter Zijlstra (Intel) Signed-off-by: Daniel Vetter Acked-by: Michal Hocko Signed-off-by: Jason Gunthorpe --- include/linux/kernel.h | 23 ++++++++++++++++++++++- include/linux/sched.h | 4 ++++ kernel/sched/core.c | 19 ++++++++++++++----- 3 files changed, 40 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 4fa360a13c1e..d83d403dac2e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -217,7 +217,9 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset); * might_sleep - annotation for functions that can sleep * * this macro will print a stack trace if it is executed in an atomic - * context (spinlock, irq-handler, ...). + * context (spinlock, irq-handler, ...). Additional sections where blocking is + * not allowed can be annotated with non_block_start() and non_block_end() + * pairs. * * This is a useful debugging help to be able to catch problems early and not * be bitten later when the calling function happens to sleep when it is not @@ -233,6 +235,23 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset); # define cant_sleep() \ do { __cant_sleep(__FILE__, __LINE__, 0); } while (0) # define sched_annotate_sleep() (current->task_state_change = 0) +/** + * non_block_start - annotate the start of section where sleeping is prohibited + * + * This is on behalf of the oom reaper, specifically when it is calling the mmu + * notifiers. The problem is that if the notifier were to block on, for example, + * mutex_lock() and if the process which holds that mutex were to perform a + * sleeping memory allocation, the oom reaper is now blocked on completion of + * that memory allocation. Other blocking calls like wait_event() pose similar + * issues. + */ +# define non_block_start() (current->non_block_count++) +/** + * non_block_end - annotate the end of section where sleeping is prohibited + * + * Closes a section opened by non_block_start(). + */ +# define non_block_end() WARN_ON(current->non_block_count-- == 0) #else static inline void ___might_sleep(const char *file, int line, int preempt_offset) { } @@ -241,6 +260,8 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset); # define might_sleep() do { might_resched(); } while (0) # define cant_sleep() do { } while (0) # define sched_annotate_sleep() do { } while (0) +# define non_block_start() do { } while (0) +# define non_block_end() do { } while (0) #endif #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 9f51932bd543..c5630f3dca1f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -974,6 +974,10 @@ struct task_struct { struct mutex_waiter *blocked_on; #endif +#ifdef CONFIG_DEBUG_ATOMIC_SLEEP + int non_block_count; +#endif + #ifdef CONFIG_TRACE_IRQFLAGS unsigned int irq_events; unsigned long hardirq_enable_ip; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2b037f195473..57245770d6cc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3700,13 +3700,22 @@ static noinline void __schedule_bug(struct task_struct *prev) /* * Various schedule()-time debugging checks and statistics: */ -static inline void schedule_debug(struct task_struct *prev) +static inline void schedule_debug(struct task_struct *prev, bool preempt) { #ifdef CONFIG_SCHED_STACK_END_CHECK if (task_stack_end_corrupted(prev)) panic("corrupted stack end detected inside scheduler\n"); #endif +#ifdef CONFIG_DEBUG_ATOMIC_SLEEP + if (!preempt && prev->state && prev->non_block_count) { + printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n", + prev->comm, prev->pid, prev->non_block_count); + dump_stack(); + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); + } +#endif + if (unlikely(in_atomic_preempt_off())) { __schedule_bug(prev); preempt_count_set(PREEMPT_DISABLED); @@ -3813,7 +3822,7 @@ static void __sched notrace __schedule(bool preempt) rq = cpu_rq(cpu); prev = rq->curr; - schedule_debug(prev); + schedule_debug(prev, preempt); if (sched_feat(HRTICK)) hrtick_clear(rq); @@ -6570,7 +6579,7 @@ void ___might_sleep(const char *file, int line, int preempt_offset) rcu_sleep_check(); if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && - !is_idle_task(current)) || + !is_idle_task(current) && !current->non_block_count) || system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING || oops_in_progress) return; @@ -6586,8 +6595,8 @@ void ___might_sleep(const char *file, int line, int preempt_offset) "BUG: sleeping function called from invalid context at %s:%d\n", file, line); printk(KERN_ERR - "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", - in_atomic(), irqs_disabled(), + "in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", + in_atomic(), irqs_disabled(), current->non_block_count, current->pid, current->comm); if (task_stack_end_corrupted(current)) -- cgit v1.2.3-59-g8ed1b