From ee7998c50c2697737c6530431709f77c852bf0d6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 12 Jul 2017 14:34:04 -0700 Subject: random: do not ignore early device randomness The add_device_randomness() function would ignore incoming bytes if the crng wasn't ready. This additionally makes sure to make an early enough call to add_latent_entropy() to influence the initial stack canary, which is especially important on non-x86 systems where it stays the same through the life of the boot. Link: http://lkml.kernel.org/r/20170626233038.GA48751@beast Signed-off-by: Kees Cook Cc: "Theodore Ts'o" Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Jessica Yu Cc: Steven Rostedt (VMware) Cc: Viresh Kumar Cc: Tejun Heo Cc: Prarit Bhargava Cc: Lokesh Vutla Cc: Nicholas Piggin Cc: AKASHI Takahiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/random.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers') diff --git a/drivers/char/random.c b/drivers/char/random.c index 01a260f67437..23cab7a8c1c1 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -987,6 +987,11 @@ void add_device_randomness(const void *buf, unsigned int size) unsigned long time = random_get_entropy() ^ jiffies; unsigned long flags; + if (!crng_ready()) { + crng_fast_load(buf, size); + return; + } + trace_add_device_randomness(size, _RET_IP_); spin_lock_irqsave(&input_pool.lock, flags); _mix_pool_bytes(&input_pool, buf, size); -- cgit v1.2.3-59-g8ed1b From 3e2c044a54e6b6373606f8ffad42a4a0759fcf3d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 12 Jul 2017 14:35:55 -0700 Subject: efi: avoid fortify checks in EFI stub This avoids CONFIG_FORTIFY_SOURCE from being enabled during the EFI stub build, as adding a panic() implementation may not work well. This can be adjusted in the future. Link: http://lkml.kernel.org/r/1497903987-21002-2-git-send-email-keescook@chromium.org Signed-off-by: Kees Cook Suggested-by: Daniel Micay Reviewed-by: Ard Biesheuvel Acked-by: Mark Rutland Cc: Matt Fleming Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/firmware/efi/libstub/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index f7425960f6a5..37e24f525162 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -17,6 +17,7 @@ cflags-$(CONFIG_ARM) := $(subst -pg,,$(KBUILD_CFLAGS)) \ cflags-$(CONFIG_EFI_ARMSTUB) += -I$(srctree)/scripts/dtc/libfdt KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \ + -D__NO_FORTIFY \ $(call cc-option,-ffreestanding) \ $(call cc-option,-fno-stack-protector) -- cgit v1.2.3-59-g8ed1b From 4c93496f18ce5044d78e4f7f9e018682a4f44b3d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 12 Jul 2017 14:36:01 -0700 Subject: IB/rxe: do not copy extra stack memory to skb This fixes a over-read condition detected by FORTIFY_SOURCE for this line: memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(skb->cb)); The error was: In file included from ./include/linux/bitmap.h:8:0, from ./include/linux/cpumask.h:11, from ./include/linux/mm_types_task.h:13, from ./include/linux/mm_types.h:4, from ./include/linux/kmemcheck.h:4, from ./include/linux/skbuff.h:18, from drivers/infiniband/sw/rxe/rxe_resp.c:34: In function 'memcpy', inlined from 'send_atomic_ack.constprop' at drivers/infiniband/sw/rxe/rxe_resp.c:998:2, inlined from 'acknowledge' at drivers/infiniband/sw/rxe/rxe_resp.c:1026:3, inlined from 'rxe_responder' at drivers/infiniband/sw/rxe/rxe_resp.c:1286:10: ./include/linux/string.h:309:4: error: call to '__read_overflow2' declared with attribute error: detected read beyond size of object passed as 2nd parameter __read_overflow2(); Daniel Micay noted that struct rxe_pkt_info is 32 bytes on 32-bit architectures, but skb->cb is still 64. The memcpy() over-reads 32 bytes. This fixes it by zeroing the unused bytes in skb->cb. Link: http://lkml.kernel.org/r/1497903987-21002-5-git-send-email-keescook@chromium.org Signed-off-by: Kees Cook Cc: Moni Shoua Cc: Doug Ledford Cc: Sean Hefty Cc: Daniel Micay Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/infiniband/sw/rxe/rxe_resp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 23039768f541..be944d5aa9af 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -995,7 +995,9 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, free_rd_atomic_resource(qp, res); rxe_advance_resp_resource(qp); - memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(skb->cb)); + memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(ack_pkt)); + memset((unsigned char *)SKB_TO_PKT(skb) + sizeof(ack_pkt), 0, + sizeof(skb->cb) - sizeof(ack_pkt)); res->type = RXE_ATOMIC_MASK; res->atomic.skb = skb; -- cgit v1.2.3-59-g8ed1b From dcda9b04713c3f6ff0875652924844fae28286ea Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 12 Jul 2017 14:36:45 -0700 Subject: mm, tree wide: replace __GFP_REPEAT by __GFP_RETRY_MAYFAIL with more useful semantic __GFP_REPEAT was designed to allow retry-but-eventually-fail semantic to the page allocator. This has been true but only for allocations requests larger than PAGE_ALLOC_COSTLY_ORDER. It has been always ignored for smaller sizes. This is a bit unfortunate because there is no way to express the same semantic for those requests and they are considered too important to fail so they might end up looping in the page allocator for ever, similarly to GFP_NOFAIL requests. Now that the whole tree has been cleaned up and accidental or misled usage of __GFP_REPEAT flag has been removed for !costly requests we can give the original flag a better name and more importantly a more useful semantic. Let's rename it to __GFP_RETRY_MAYFAIL which tells the user that the allocator would try really hard but there is no promise of a success. This will work independent of the order and overrides the default allocator behavior. Page allocator users have several levels of guarantee vs. cost options (take GFP_KERNEL as an example) - GFP_KERNEL & ~__GFP_RECLAIM - optimistic allocation without _any_ attempt to free memory at all. The most light weight mode which even doesn't kick the background reclaim. Should be used carefully because it might deplete the memory and the next user might hit the more aggressive reclaim - GFP_KERNEL & ~__GFP_DIRECT_RECLAIM (or GFP_NOWAIT)- optimistic allocation without any attempt to free memory from the current context but can wake kswapd to reclaim memory if the zone is below the low watermark. Can be used from either atomic contexts or when the request is a performance optimization and there is another fallback for a slow path. - (GFP_KERNEL|__GFP_HIGH) & ~__GFP_DIRECT_RECLAIM (aka GFP_ATOMIC) - non sleeping allocation with an expensive fallback so it can access some portion of memory reserves. Usually used from interrupt/bh context with an expensive slow path fallback. - GFP_KERNEL - both background and direct reclaim are allowed and the _default_ page allocator behavior is used. That means that !costly allocation requests are basically nofail but there is no guarantee of that behavior so failures have to be checked properly by callers (e.g. OOM killer victim is allowed to fail currently). - GFP_KERNEL | __GFP_NORETRY - overrides the default allocator behavior and all allocation requests fail early rather than cause disruptive reclaim (one round of reclaim in this implementation). The OOM killer is not invoked. - GFP_KERNEL | __GFP_RETRY_MAYFAIL - overrides the default allocator behavior and all allocation requests try really hard. The request will fail if the reclaim cannot make any progress. The OOM killer won't be triggered. - GFP_KERNEL | __GFP_NOFAIL - overrides the default allocator behavior and all allocation requests will loop endlessly until they succeed. This might be really dangerous especially for larger orders. Existing users of __GFP_REPEAT are changed to __GFP_RETRY_MAYFAIL because they already had their semantic. No new users are added. __alloc_pages_slowpath is changed to bail out for __GFP_RETRY_MAYFAIL if there is no progress and we have already passed the OOM point. This means that all the reclaim opportunities have been exhausted except the most disruptive one (the OOM killer) and a user defined fallback behavior is more sensible than keep retrying in the page allocator. [akpm@linux-foundation.org: fix arch/sparc/kernel/mdesc.c] [mhocko@suse.com: semantic fix] Link: http://lkml.kernel.org/r/20170626123847.GM11534@dhcp22.suse.cz [mhocko@kernel.org: address other thing spotted by Vlastimil] Link: http://lkml.kernel.org/r/20170626124233.GN11534@dhcp22.suse.cz Link: http://lkml.kernel.org/r/20170623085345.11304-3-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Alex Belits Cc: Chris Wilson Cc: Christoph Hellwig Cc: Darrick J. Wong Cc: David Daney Cc: Johannes Weiner Cc: Mel Gorman Cc: NeilBrown Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/DMA-ISA-LPC.txt | 2 +- arch/powerpc/include/asm/book3s/64/pgalloc.h | 2 +- arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +- arch/sparc/kernel/mdesc.c | 2 +- drivers/mmc/host/wbsd.c | 2 +- drivers/s390/char/vmcp.c | 2 +- drivers/target/target_core_transport.c | 2 +- drivers/vhost/net.c | 2 +- drivers/vhost/scsi.c | 2 +- drivers/vhost/vsock.c | 2 +- include/linux/gfp.h | 56 +++++++++++++++++++++------- include/linux/slab.h | 3 +- include/trace/events/mmflags.h | 2 +- mm/hugetlb.c | 4 +- mm/internal.h | 2 +- mm/page_alloc.c | 14 +++++-- mm/sparse-vmemmap.c | 4 +- mm/util.c | 6 +-- mm/vmalloc.c | 2 +- mm/vmscan.c | 8 ++-- net/core/dev.c | 6 +-- net/core/skbuff.c | 2 +- net/sched/sch_fq.c | 2 +- tools/perf/builtin-kmem.c | 2 +- 24 files changed, 86 insertions(+), 47 deletions(-) (limited to 'drivers') diff --git a/Documentation/DMA-ISA-LPC.txt b/Documentation/DMA-ISA-LPC.txt index c41331398752..7a065ac4a9d1 100644 --- a/Documentation/DMA-ISA-LPC.txt +++ b/Documentation/DMA-ISA-LPC.txt @@ -42,7 +42,7 @@ requirements you pass the flag GFP_DMA to kmalloc. Unfortunately the memory available for ISA DMA is scarce so unless you allocate the memory during boot-up it's a good idea to also pass -__GFP_REPEAT and __GFP_NOWARN to make the allocator try a bit harder. +__GFP_RETRY_MAYFAIL and __GFP_NOWARN to make the allocator try a bit harder. (This scarcity also means that you should allocate the buffer as early as possible and not release it until the driver is unloaded.) diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 20b1485ff1e8..e2329db9d6f4 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -56,7 +56,7 @@ static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm) return (pgd_t *)__get_free_page(pgtable_gfp_flags(mm, PGALLOC_GFP)); #else struct page *page; - page = alloc_pages(pgtable_gfp_flags(mm, PGALLOC_GFP | __GFP_REPEAT), + page = alloc_pages(pgtable_gfp_flags(mm, PGALLOC_GFP | __GFP_RETRY_MAYFAIL), 4); if (!page) return NULL; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 710e491206ed..8cb0190e2a73 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -93,7 +93,7 @@ int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order) } if (!hpt) - hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT + hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_RETRY_MAYFAIL |__GFP_NOWARN, order - PAGE_SHIFT); if (!hpt) diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index e4b4e790bf89..fa466ce45bc9 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -205,7 +205,7 @@ static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size) handle_size = (sizeof(struct mdesc_handle) - sizeof(struct mdesc_hdr) + mdesc_size); - base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_REPEAT); + base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!base) return NULL; diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c index e15a9733fcfd..9668616faf16 100644 --- a/drivers/mmc/host/wbsd.c +++ b/drivers/mmc/host/wbsd.c @@ -1386,7 +1386,7 @@ static void wbsd_request_dma(struct wbsd_host *host, int dma) * order for ISA to be able to DMA to it. */ host->dma_buffer = kmalloc(WBSD_DMA_SIZE, - GFP_NOIO | GFP_DMA | __GFP_REPEAT | __GFP_NOWARN); + GFP_NOIO | GFP_DMA | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (!host->dma_buffer) goto free; diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c index 65f5a794f26d..98749fa817da 100644 --- a/drivers/s390/char/vmcp.c +++ b/drivers/s390/char/vmcp.c @@ -98,7 +98,7 @@ vmcp_write(struct file *file, const char __user *buff, size_t count, } if (!session->response) session->response = (char *)__get_free_pages(GFP_KERNEL - | __GFP_REPEAT | GFP_DMA, + | __GFP_RETRY_MAYFAIL | GFP_DMA, get_order(session->bufsize)); if (!session->response) { mutex_unlock(&session->mutex); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index f1b3a46bdcaf..1bdc10651bcd 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -252,7 +252,7 @@ int transport_alloc_session_tags(struct se_session *se_sess, int rc; se_sess->sess_cmd_map = kzalloc(tag_num * tag_size, - GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL); if (!se_sess->sess_cmd_map) { se_sess->sess_cmd_map = vzalloc(tag_num * tag_size); if (!se_sess->sess_cmd_map) { diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index e3d7ea1288c6..06d044862e58 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -897,7 +897,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) struct sk_buff **queue; int i; - n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_REPEAT); + n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!n) return -ENOMEM; vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index fd6c8b66f06f..ff02a942c4d5 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1404,7 +1404,7 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) struct vhost_virtqueue **vqs; int r = -ENOMEM, i; - vs = kzalloc(sizeof(*vs), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + vs = kzalloc(sizeof(*vs), GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL); if (!vs) { vs = vzalloc(sizeof(*vs)); if (!vs) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 3f63e03de8e8..c9de9c41aa97 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -508,7 +508,7 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) /* This struct is large and allocation could fail, fall back to vmalloc * if there is no other way. */ - vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_REPEAT); + vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!vsock) return -ENOMEM; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 4c6656f1fee7..bcfb9f7c46f5 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -25,7 +25,7 @@ struct vm_area_struct; #define ___GFP_FS 0x80u #define ___GFP_COLD 0x100u #define ___GFP_NOWARN 0x200u -#define ___GFP_REPEAT 0x400u +#define ___GFP_RETRY_MAYFAIL 0x400u #define ___GFP_NOFAIL 0x800u #define ___GFP_NORETRY 0x1000u #define ___GFP_MEMALLOC 0x2000u @@ -136,26 +136,56 @@ struct vm_area_struct; * * __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim. * - * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt - * _might_ fail. This depends upon the particular VM implementation. + * The default allocator behavior depends on the request size. We have a concept + * of so called costly allocations (with order > PAGE_ALLOC_COSTLY_ORDER). + * !costly allocations are too essential to fail so they are implicitly + * non-failing by default (with some exceptions like OOM victims might fail so + * the caller still has to check for failures) while costly requests try to be + * not disruptive and back off even without invoking the OOM killer. + * The following three modifiers might be used to override some of these + * implicit rules + * + * __GFP_NORETRY: The VM implementation will try only very lightweight + * memory direct reclaim to get some memory under memory pressure (thus + * it can sleep). It will avoid disruptive actions like OOM killer. The + * caller must handle the failure which is quite likely to happen under + * heavy memory pressure. The flag is suitable when failure can easily be + * handled at small cost, such as reduced throughput + * + * __GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim + * procedures that have previously failed if there is some indication + * that progress has been made else where. It can wait for other + * tasks to attempt high level approaches to freeing memory such as + * compaction (which removes fragmentation) and page-out. + * There is still a definite limit to the number of retries, but it is + * a larger limit than with __GFP_NORETRY. + * Allocations with this flag may fail, but only when there is + * genuinely little unused memory. While these allocations do not + * directly trigger the OOM killer, their failure indicates that + * the system is likely to need to use the OOM killer soon. The + * caller must handle failure, but can reasonably do so by failing + * a higher-level request, or completing it only in a much less + * efficient manner. + * If the allocation does fail, and the caller is in a position to + * free some non-essential memory, doing so could benefit the system + * as a whole. * * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller - * cannot handle allocation failures. New users should be evaluated carefully - * (and the flag should be used only when there is no reasonable failure - * policy) but it is definitely preferable to use the flag rather than - * opencode endless loop around allocator. - * - * __GFP_NORETRY: The VM implementation must not retry indefinitely and will - * return NULL when direct reclaim and memory compaction have failed to allow - * the allocation to succeed. The OOM killer is not called with the current - * implementation. + * cannot handle allocation failures. The allocation could block + * indefinitely but will never return with failure. Testing for + * failure is pointless. + * New users should be evaluated carefully (and the flag should be + * used only when there is no reasonable failure policy) but it is + * definitely preferable to use the flag rather than opencode endless + * loop around allocator. + * Using this flag for costly allocations is _highly_ discouraged. */ #define __GFP_IO ((__force gfp_t)___GFP_IO) #define __GFP_FS ((__force gfp_t)___GFP_FS) #define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */ #define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */ #define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) -#define __GFP_REPEAT ((__force gfp_t)___GFP_REPEAT) +#define __GFP_RETRY_MAYFAIL ((__force gfp_t)___GFP_RETRY_MAYFAIL) #define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) #define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) diff --git a/include/linux/slab.h b/include/linux/slab.h index 04a7f7993e67..41473df6dfb0 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -471,7 +471,8 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) * * %__GFP_NOWARN - If allocation fails, don't issue any warnings. * - * %__GFP_REPEAT - If allocation fails initially, try once more before failing. + * %__GFP_RETRY_MAYFAIL - Try really hard to succeed the allocation but fail + * eventually. * * There are other flags available as well, but these are not intended * for general use, and so are not documented here. For a full list of diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 10e3663a75a6..8e50d01c645f 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -34,7 +34,7 @@ {(unsigned long)__GFP_FS, "__GFP_FS"}, \ {(unsigned long)__GFP_COLD, "__GFP_COLD"}, \ {(unsigned long)__GFP_NOWARN, "__GFP_NOWARN"}, \ - {(unsigned long)__GFP_REPEAT, "__GFP_REPEAT"}, \ + {(unsigned long)__GFP_RETRY_MAYFAIL, "__GFP_RETRY_MAYFAIL"}, \ {(unsigned long)__GFP_NOFAIL, "__GFP_NOFAIL"}, \ {(unsigned long)__GFP_NORETRY, "__GFP_NORETRY"}, \ {(unsigned long)__GFP_COMP, "__GFP_COMP"}, \ diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 1e516520433d..bc48ee783dd9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1384,7 +1384,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) page = __alloc_pages_node(nid, htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE| - __GFP_REPEAT|__GFP_NOWARN, + __GFP_RETRY_MAYFAIL|__GFP_NOWARN, huge_page_order(h)); if (page) { prep_new_huge_page(h, page, nid); @@ -1525,7 +1525,7 @@ static struct page *__hugetlb_alloc_buddy_huge_page(struct hstate *h, { int order = huge_page_order(h); - gfp_mask |= __GFP_COMP|__GFP_REPEAT|__GFP_NOWARN; + gfp_mask |= __GFP_COMP|__GFP_RETRY_MAYFAIL|__GFP_NOWARN; if (nid == NUMA_NO_NODE) nid = numa_mem_id(); return __alloc_pages_nodemask(gfp_mask, order, nid, nmask); diff --git a/mm/internal.h b/mm/internal.h index 0e4f558412fb..24d88f084705 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -23,7 +23,7 @@ * hints such as HIGHMEM usage. */ #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\ - __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\ + __GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_NOFAIL|\ __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\ __GFP_ATOMIC) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 64b7d82a9b1a..6d30e914afb6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3284,6 +3284,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, /* The OOM killer will not help higher order allocs */ if (order > PAGE_ALLOC_COSTLY_ORDER) goto out; + /* + * We have already exhausted all our reclaim opportunities without any + * success so it is time to admit defeat. We will skip the OOM killer + * because it is very likely that the caller has a more reasonable + * fallback than shooting a random task. + */ + if (gfp_mask & __GFP_RETRY_MAYFAIL) + goto out; /* The OOM killer does not needlessly kill tasks for lowmem */ if (ac->high_zoneidx < ZONE_NORMAL) goto out; @@ -3413,7 +3421,7 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags, } /* - * !costly requests are much more important than __GFP_REPEAT + * !costly requests are much more important than __GFP_RETRY_MAYFAIL * costly ones because they are de facto nofail and invoke OOM * killer to move on while costly can fail and users are ready * to cope with that. 1/4 retries is rather arbitrary but we @@ -3920,9 +3928,9 @@ retry: /* * Do not retry costly high order allocations unless they are - * __GFP_REPEAT + * __GFP_RETRY_MAYFAIL */ - if (costly_order && !(gfp_mask & __GFP_REPEAT)) + if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) goto nopage; if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index a56c3989f773..c50b1a14d55e 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -56,11 +56,11 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node) if (node_state(node, N_HIGH_MEMORY)) page = alloc_pages_node( - node, GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT, + node, GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL, get_order(size)); else page = alloc_pages( - GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT, + GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL, get_order(size)); if (page) return page_address(page); diff --git a/mm/util.c b/mm/util.c index 26be6407abd7..6520f2d4a226 100644 --- a/mm/util.c +++ b/mm/util.c @@ -339,7 +339,7 @@ EXPORT_SYMBOL(vm_mmap); * Uses kmalloc to get the memory but if the allocation fails then falls back * to the vmalloc allocator. Use kvfree for freeing the memory. * - * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported. __GFP_REPEAT + * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported. __GFP_RETRY_MAYFAIL * is supported only for large (>32kB) allocations, and it should be used only if * kmalloc is preferable to the vmalloc fallback, due to visible performance drawbacks. * @@ -367,11 +367,11 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node) kmalloc_flags |= __GFP_NOWARN; /* - * We have to override __GFP_REPEAT by __GFP_NORETRY for !costly + * We have to override __GFP_RETRY_MAYFAIL by __GFP_NORETRY for !costly * requests because there is no other way to tell the allocator * that we want to fail rather than retry endlessly. */ - if (!(kmalloc_flags & __GFP_REPEAT) || + if (!(kmalloc_flags & __GFP_RETRY_MAYFAIL) || (size <= PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) kmalloc_flags |= __GFP_NORETRY; } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6016ab079e2b..8698c1c86c4d 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1795,7 +1795,7 @@ fail: * allocator with @gfp_mask flags. Map them into contiguous * kernel virtual space, using a pagetable protection of @prot. * - * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_REPEAT + * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL * and __GFP_NOFAIL are not supported * * Any use of gfp flags outside of GFP_KERNEL should be consulted diff --git a/mm/vmscan.c b/mm/vmscan.c index e9210f825219..a1af041930a6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2506,18 +2506,18 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat, return false; /* Consider stopping depending on scan and reclaim activity */ - if (sc->gfp_mask & __GFP_REPEAT) { + if (sc->gfp_mask & __GFP_RETRY_MAYFAIL) { /* - * For __GFP_REPEAT allocations, stop reclaiming if the + * For __GFP_RETRY_MAYFAIL allocations, stop reclaiming if the * full LRU list has been scanned and we are still failing * to reclaim pages. This full LRU scan is potentially - * expensive but a __GFP_REPEAT caller really wants to succeed + * expensive but a __GFP_RETRY_MAYFAIL caller really wants to succeed */ if (!nr_reclaimed && !nr_scanned) return false; } else { /* - * For non-__GFP_REPEAT allocations which can presumably + * For non-__GFP_RETRY_MAYFAIL allocations which can presumably * fail without consequence, stop if we failed to reclaim * any pages from the last SWAP_CLUSTER_MAX number of * pages that were scanned. This will return to the diff --git a/net/core/dev.c b/net/core/dev.c index 02440518dd69..8515f8fe0460 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7384,7 +7384,7 @@ static int netif_alloc_rx_queues(struct net_device *dev) BUG_ON(count < 1); - rx = kvzalloc(sz, GFP_KERNEL | __GFP_REPEAT); + rx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!rx) return -ENOMEM; @@ -7424,7 +7424,7 @@ static int netif_alloc_netdev_queues(struct net_device *dev) if (count < 1 || count > 0xffff) return -EINVAL; - tx = kvzalloc(sz, GFP_KERNEL | __GFP_REPEAT); + tx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!tx) return -ENOMEM; @@ -7965,7 +7965,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, /* ensure 32-byte alignment of whole construct */ alloc_size += NETDEV_ALIGN - 1; - p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_REPEAT); + p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!p) return NULL; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8b11341ed69a..f990eb8b30a9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4747,7 +4747,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len, gfp_head = gfp_mask; if (gfp_head & __GFP_DIRECT_RECLAIM) - gfp_head |= __GFP_REPEAT; + gfp_head |= __GFP_RETRY_MAYFAIL; *errcode = -ENOBUFS; skb = alloc_skb(header_len, gfp_head); diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 147fde73a0f5..263d16e3219e 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -648,7 +648,7 @@ static int fq_resize(struct Qdisc *sch, u32 log) return 0; /* If XPS was setup, we can allocate memory on right NUMA node */ - array = kvmalloc_node(sizeof(struct rb_root) << log, GFP_KERNEL | __GFP_REPEAT, + array = kvmalloc_node(sizeof(struct rb_root) << log, GFP_KERNEL | __GFP_RETRY_MAYFAIL, netdev_queue_numa_node_read(sch->dev_queue)); if (!array) return -ENOMEM; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 0a8a1c45af87..a1497c516d85 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -643,7 +643,7 @@ static const struct { { "__GFP_FS", "F" }, { "__GFP_COLD", "CO" }, { "__GFP_NOWARN", "NWR" }, - { "__GFP_REPEAT", "R" }, + { "__GFP_RETRY_MAYFAIL", "R" }, { "__GFP_NOFAIL", "NF" }, { "__GFP_NORETRY", "NR" }, { "__GFP_COMP", "C" }, -- cgit v1.2.3-59-g8ed1b From dbb329561ae9ccfb942b6ba330030a4654e8908e Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 12 Jul 2017 14:36:55 -0700 Subject: drm/i915: use __GFP_RETRY_MAYFAIL Commit 24f8e00a8a2e ("drm/i915: Prefer to report ENOMEM rather than incur the oom for gfx allocations") has tried to remove disruptive OOM killer because the userspace should be able to cope with allocation failures. At the time only __GFP_NORETRY could achieve that and it turned out that this would fail the allocations just too easily. So "drm/i915: Remove __GFP_NORETRY from our buffer allocator" removed it and hoped for a better solution. __GFP_RETRY_MAYFAIL is that solution. It will keep retrying the allocation until there is no more progress and we would go OOM. Instead we fail the allocation and let the caller to deal with it. Link: http://lkml.kernel.org/r/20170623085345.11304-6-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Chris Wilson Cc: Alex Belits Cc: Christoph Hellwig Cc: Darrick J. Wong Cc: David Daney Cc: Johannes Weiner Cc: Mel Gorman Cc: NeilBrown Cc: Ralf Baechle Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpu/drm/i915/i915_gem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7dcac3bfb771..969bac8404f1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2434,8 +2434,9 @@ rebuild_st: * again with !__GFP_NORETRY. However, we still * want to fail this allocation rather than * trigger the out-of-memory killer and for - * this we want the future __GFP_MAYFAIL. + * this we want __GFP_RETRY_MAYFAIL. */ + gfp |= __GFP_RETRY_MAYFAIL; } } while (1); -- cgit v1.2.3-59-g8ed1b From e0710e510c59602e4fa80d784c946e02e8968523 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 12 Jul 2017 14:37:34 -0700 Subject: drivers: s390: move static and inline before return type Make the code like the rest of the kernel. Link: http://lkml.kernel.org/r/3f980cd89084ae09716353aba3171e4b3815e690.1499284835.git.joe@perches.com Signed-off-by: Joe Perches Cc: Julian Wiedmann Cc: Ursula Braun Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/s390/net/ctcm_main.c | 2 +- drivers/s390/net/qeth_l3_main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c index 1563b1458e44..2ade6131a89f 100644 --- a/drivers/s390/net/ctcm_main.c +++ b/drivers/s390/net/ctcm_main.c @@ -1115,7 +1115,7 @@ static const struct net_device_ops ctcm_mpc_netdev_ops = { .ndo_start_xmit = ctcmpc_tx, }; -void static ctcm_dev_setup(struct net_device *dev) +static void ctcm_dev_setup(struct net_device *dev) { dev->type = ARPHRD_SLIP; dev->tx_queue_len = 100; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 3062cde33a3d..8975cd321390 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2408,7 +2408,7 @@ static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) return rc; } -int inline qeth_l3_get_cast_type(struct qeth_card *card, struct sk_buff *skb) +inline int qeth_l3_get_cast_type(struct qeth_card *card, struct sk_buff *skb) { int cast_type = RTN_UNSPEC; struct neighbour *n = NULL; -- cgit v1.2.3-59-g8ed1b From a9e5bfdb9d786efbc6995edfe6902788527457ad Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 12 Jul 2017 14:37:37 -0700 Subject: drivers: tty: serial: move inline before return type Make the code like the rest of the kernel. Link: http://lkml.kernel.org/r/55d3e89d50bb03d603bfb28019fab07f48bdc714.1499284835.git.joe@perches.com Signed-off-by: Joe Perches Cc: Pat Gefre Cc: Greg Kroah-Hartman Cc: Jiri Slaby Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/tty/serial/ioc3_serial.c | 4 ++-- drivers/tty/serial/ioc4_serial.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/tty/serial/ioc3_serial.c b/drivers/tty/serial/ioc3_serial.c index 2a61dd6b4009..906ee770ff4a 100644 --- a/drivers/tty/serial/ioc3_serial.c +++ b/drivers/tty/serial/ioc3_serial.c @@ -377,7 +377,7 @@ static struct ioc3_port *get_ioc3_port(struct uart_port *the_port) * called per port from attach... * @port: port to initialize */ -static int inline port_init(struct ioc3_port *port) +static inline int port_init(struct ioc3_port *port) { uint32_t sio_cr; struct port_hooks *hooks = port->ip_hooks; @@ -1430,7 +1430,7 @@ static int receive_chars(struct uart_port *the_port) * @pending: interrupts to handle */ -static int inline +static inline int ioc3uart_intr_one(struct ioc3_submodule *is, struct ioc3_driver_data *idd, unsigned int pending) diff --git a/drivers/tty/serial/ioc4_serial.c b/drivers/tty/serial/ioc4_serial.c index f96bcf9bee25..43d7d32eb150 100644 --- a/drivers/tty/serial/ioc4_serial.c +++ b/drivers/tty/serial/ioc4_serial.c @@ -824,7 +824,7 @@ pending_intrs(struct ioc4_soft *soft, int type) * called per port from attach... * @port: port to initialize */ -static int inline port_init(struct ioc4_port *port) +static inline int port_init(struct ioc4_port *port) { uint32_t sio_cr; struct hooks *hooks = port->ip_hooks; @@ -1048,7 +1048,7 @@ static irqreturn_t ioc4_intr(int irq, void *arg) * IOC4 with serial ports in the system. * @idd: Master module data for this IOC4 */ -static int inline ioc4_attach_local(struct ioc4_driver_data *idd) +static inline int ioc4_attach_local(struct ioc4_driver_data *idd) { struct ioc4_port *port; struct ioc4_port *ports[IOC4_NUM_SERIAL_PORTS]; -- cgit v1.2.3-59-g8ed1b From 4abf87f41a6cedf7a780af63991d6bebec303be0 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 12 Jul 2017 14:37:40 -0700 Subject: USB: serial: safe_serial: move __inline__ before return type Make the code like the rest of the kernel. Also use inline instead of __inline__. Link: http://lkml.kernel.org/r/a5072b74b6c293e6ec93c4900482e9d3267f15b2.1499284835.git.joe@perches.com Signed-off-by: Joe Perches Cc: Johan Hovold Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/usb/serial/safe_serial.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/usb/serial/safe_serial.c b/drivers/usb/serial/safe_serial.c index 8a069aa154ed..27d7a7016298 100644 --- a/drivers/usb/serial/safe_serial.c +++ b/drivers/usb/serial/safe_serial.c @@ -180,7 +180,7 @@ static const __u16 crc10_table[256] = { * Perform a memcpy and calculate fcs using ppp 10bit CRC algorithm. Return * new 10 bit FCS. */ -static __u16 __inline__ fcs_compute10(unsigned char *sp, int len, __u16 fcs) +static inline __u16 fcs_compute10(unsigned char *sp, int len, __u16 fcs) { for (; len-- > 0; fcs = CRC10_FCS(fcs, *sp++)); return fcs; -- cgit v1.2.3-59-g8ed1b From dce3944717523bfe65e2b3cbd1d3af0ccc27ace9 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 12 Jul 2017 14:37:43 -0700 Subject: video: fbdev: intelfb: move inline before return type Make the code like the rest of the kernel. But there is an oddity here because the inline should probably be removed. It's an extern function in intelfb.h and it is used in intelfbdrv.c and intelfbhw.c. The inline is kept here as I suppose it's possible for some compiler to make the uses inline in intelfbdrv and and also create an external function for intelfbhw. Link: http://lkml.kernel.org/r/8ba151a1fdc84e42cbf4aafc798513c0158edee1.1499284835.git.joe@perches.com Signed-off-by: Joe Perches Cc: Maik Broemme Cc: Bartlomiej Zolnierkiewicz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/fbdev/intelfb/intelfbdrv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/video/fbdev/intelfb/intelfbdrv.c b/drivers/video/fbdev/intelfb/intelfbdrv.c index 6b444400a86c..ffc391208b27 100644 --- a/drivers/video/fbdev/intelfb/intelfbdrv.c +++ b/drivers/video/fbdev/intelfb/intelfbdrv.c @@ -907,7 +907,7 @@ static void intelfb_pci_unregister(struct pci_dev *pdev) * helper functions * ***************************************************************/ -int __inline__ intelfb_var_to_depth(const struct fb_var_screeninfo *var) +__inline__ int intelfb_var_to_depth(const struct fb_var_screeninfo *var) { DBG_MSG("intelfb_var_to_depth: bpp: %d, green.length is %d\n", var->bits_per_pixel, var->green.length); -- cgit v1.2.3-59-g8ed1b From ba168a46b0b9e06f38f63ef9adf182950c82d6e0 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 12 Jul 2017 14:37:46 -0700 Subject: video: fbdev: omap: move inline before return type Make the code like the rest of the kernel. Link: http://lkml.kernel.org/r/bc5927726abc70d7c066df7ab4cb7cfce4a7b577.1499284835.git.joe@perches.com Signed-off-by: Joe Perches Cc: Tomi Valkeinen Cc: Bartlomiej Zolnierkiewicz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/fbdev/omap/lcdc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/video/fbdev/omap/lcdc.c b/drivers/video/fbdev/omap/lcdc.c index e3d9b9ea5498..938cba0d24ae 100644 --- a/drivers/video/fbdev/omap/lcdc.c +++ b/drivers/video/fbdev/omap/lcdc.c @@ -79,12 +79,12 @@ static struct omap_lcd_controller { unsigned long vram_size; } lcdc; -static void inline enable_irqs(int mask) +static inline void enable_irqs(int mask) { lcdc.irq_mask |= mask; } -static void inline disable_irqs(int mask) +static inline void disable_irqs(int mask) { lcdc.irq_mask &= ~mask; } @@ -466,7 +466,7 @@ static void calc_ck_div(int is_tft, int pck, int *pck_div) } } -static void inline setup_regs(void) +static inline void setup_regs(void) { u32 l; struct lcd_panel *panel = lcdc.fbdev->panel; -- cgit v1.2.3-59-g8ed1b