From b57e622e6da9048c96fa0ed6943834949a398e3f Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Mon, 11 Mar 2019 23:28:10 -0700 Subject: mm/hmm: convert to use vm_fault_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert to use vm_fault_t type as return type for fault handler. kbuild reported warning during testing of *mm-create-the-new-vm_fault_t-type.patch* available in below link - https://patchwork.kernel.org/patch/10752741/ kernel/memremap.c:46:34: warning: incorrect type in return expression (different base types) kernel/memremap.c:46:34: expected restricted vm_fault_t kernel/memremap.c:46:34: got int This patch has fixed the warnings and also hmm_devmem_fault() is converted to return vm_fault_t to avoid further warnings. [sfr@canb.auug.org.au: drm/nouveau/dmem: update for struct hmm_devmem_ops member change] Link: http://lkml.kernel.org/r/20190220174407.753d94e5@canb.auug.org.au Link: http://lkml.kernel.org/r/20190110145900.GA1317@jordon-HP-15-Notebook-PC Signed-off-by: Souptick Joarder Signed-off-by: Stephen Rothwell Reviewed-by: Jérôme Glisse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 66f9ebbb1df3..ad50b7b4f141 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -468,7 +468,7 @@ struct hmm_devmem_ops { * Note that mmap semaphore is held in read mode at least when this * callback occurs, hence the vma is valid upon callback entry. */ - int (*fault)(struct hmm_devmem *devmem, + vm_fault_t (*fault)(struct hmm_devmem *devmem, struct vm_area_struct *vma, unsigned long addr, const struct page *page, @@ -511,7 +511,7 @@ struct hmm_devmem_ops { * chunk, as an optimization. It must, however, prioritize the faulting address * over all the others. */ -typedef int (*dev_page_fault_t)(struct vm_area_struct *vma, +typedef vm_fault_t (*dev_page_fault_t)(struct vm_area_struct *vma, unsigned long addr, const struct page *page, unsigned int flags, -- cgit v1.2.3-59-g8ed1b From b5420237ec817b0b5f729a674c81ace0865c3b3b Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 11 Mar 2019 23:28:13 -0700 Subject: mm: refactor readahead defines in mm.h All users of VM_MAX_READAHEAD actually convert it to kbytes and then to pages. Define the macro explicitly as (SZ_128K / PAGE_SIZE). This simplifies the expression in every filesystem. Also rename the macro to VM_READAHEAD_PAGES to properly convey its meaning. Finally remove unused VM_MIN_READAHEAD [akpm@linux-foundation.org: fix fs/io_uring.c, per Stephen] Link: http://lkml.kernel.org/r/20181221144053.24318-1-nborisov@suse.com Signed-off-by: Nikolay Borisov Reviewed-by: Matthew Wilcox Reviewed-by: David Hildenbrand Cc: Jens Axboe Cc: Eric Van Hensbergen Cc: Latchesar Ionkov Cc: Dominique Martinet Cc: David Howells Cc: Chris Mason Cc: Josef Bacik Cc: David Sterba Cc: Miklos Szeredi Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- block/blk-core.c | 3 +-- fs/9p/vfs_super.c | 2 +- fs/afs/super.c | 2 +- fs/btrfs/disk-io.c | 2 +- fs/fuse/inode.c | 2 +- fs/io_uring.c | 2 +- include/linux/mm.h | 4 ++-- 7 files changed, 8 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 6b78ec56a4f2..4673ebe42255 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -500,8 +500,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) if (!q->stats) goto fail_stats; - q->backing_dev_info->ra_pages = - (VM_MAX_READAHEAD * 1024) / PAGE_SIZE; + q->backing_dev_info->ra_pages = VM_READAHEAD_PAGES; q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK; q->backing_dev_info->name = "block"; q->node = node_id; diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 48ce50484e80..10d3bd3f534b 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -92,7 +92,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, return ret; if (v9ses->cache) - sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_SIZE; + sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; sb->s_flags |= SB_ACTIVE | SB_DIRSYNC; if (!v9ses->cache) diff --git a/fs/afs/super.c b/fs/afs/super.c index dcd07fe99871..e684f6769b15 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -399,7 +399,7 @@ static int afs_fill_super(struct super_block *sb, ret = super_setup_bdi(sb); if (ret) return ret; - sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; + sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; /* allocate the root inode and dentry */ if (as->dyn_root) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f0cdb53f3e2d..6fe9197f6ee4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2958,7 +2958,7 @@ int open_ctree(struct super_block *sb, sb->s_bdi->congested_fn = btrfs_congested_fn; sb->s_bdi->congested_data = fs_info; sb->s_bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK; - sb->s_bdi->ra_pages = VM_MAX_READAHEAD * SZ_1K / PAGE_SIZE; + sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super); sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index c2d4099429be..16750ed591ae 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1010,7 +1010,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) if (err) return err; - sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_SIZE; + sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; /* fuse does it's own writeback accounting */ sb->s_bdi->capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT; diff --git a/fs/io_uring.c b/fs/io_uring.c index 5d99376d2369..c88088d92613 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -923,7 +923,7 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len) /* Use 8x RA size as a decent limiter for both reads/writes */ max_pages = filp->f_ra.ra_pages; if (!max_pages) - max_pages = VM_MAX_READAHEAD >> (PAGE_SHIFT - 10); + max_pages = VM_READAHEAD_PAGES; max_pages *= 8; /* If max pages are exceeded, reset the state */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 5801ee849f36..76769749b5a5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -26,6 +26,7 @@ #include #include #include +#include struct mempolicy; struct anon_vma; @@ -2402,8 +2403,7 @@ int __must_check write_one_page(struct page *page); void task_dirty_inc(struct task_struct *tsk); /* readahead.c */ -#define VM_MAX_READAHEAD 128 /* kbytes */ -#define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */ +#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE) int force_page_cache_readahead(struct address_space *mapping, struct file *filp, pgoff_t offset, unsigned long nr_to_read); -- cgit v1.2.3-59-g8ed1b From 53d818d2747ca84f1a87a0006b903523cd5bf0cd Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 11 Mar 2019 23:29:11 -0700 Subject: memblock: drop memblock_alloc_base_nid() memblock_alloc_base_nid() is a oneliner wrapper for memblock_alloc_range_nid() without any side effect. Replace it's usage by the direct calls to memblock_alloc_range_nid(). Link: http://lkml.kernel.org/r/1548057848-15136-5-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Cc: Catalin Marinas Cc: Christophe Leroy Cc: Christoph Hellwig Cc: "David S. Miller" Cc: Dennis Zhou Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Kroah-Hartman Cc: Guan Xuetao Cc: Guo Ren Cc: Guo Ren [c-sky] Cc: Heiko Carstens Cc: Juergen Gross [Xen] Cc: Mark Salter Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Paul Burton Cc: Petr Mladek Cc: Richard Weinberger Cc: Rich Felker Cc: Rob Herring Cc: Rob Herring Cc: Russell King Cc: Stafford Horne Cc: Tony Luck Cc: Vineet Gupta Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 3 --- mm/memblock.c | 15 ++++----------- 2 files changed, 4 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 859b55b66db2..4db53f7c6b17 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -446,9 +446,6 @@ static inline bool memblock_bottom_up(void) phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t end, enum memblock_flags flags); -phys_addr_t memblock_alloc_base_nid(phys_addr_t size, - phys_addr_t align, phys_addr_t max_addr, - int nid, enum memblock_flags flags); phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr); phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align, diff --git a/mm/memblock.c b/mm/memblock.c index 470601115892..e9e440cfd210 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1289,21 +1289,14 @@ phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align, flags); } -phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size, - phys_addr_t align, phys_addr_t max_addr, - int nid, enum memblock_flags flags) -{ - return memblock_alloc_range_nid(size, align, 0, max_addr, nid, flags); -} - phys_addr_t __init memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) { enum memblock_flags flags = choose_memblock_flags(); phys_addr_t ret; again: - ret = memblock_alloc_base_nid(size, align, MEMBLOCK_ALLOC_ACCESSIBLE, - nid, flags); + ret = memblock_alloc_range_nid(size, align, 0, + MEMBLOCK_ALLOC_ACCESSIBLE, nid, flags); if (!ret && (flags & MEMBLOCK_MIRROR)) { flags &= ~MEMBLOCK_MIRROR; @@ -1314,8 +1307,8 @@ again: phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) { - return memblock_alloc_base_nid(size, align, max_addr, NUMA_NO_NODE, - MEMBLOCK_NONE); + return memblock_alloc_range_nid(size, align, 0, max_addr, NUMA_NO_NODE, + MEMBLOCK_NONE); } phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) -- cgit v1.2.3-59-g8ed1b From 8a770c2a83eaf4c3d493ca4056abd6d6ddce6f18 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 11 Mar 2019 23:29:16 -0700 Subject: memblock: emphasize that memblock_alloc_range() returns a physical address Rename memblock_alloc_range() to memblock_phys_alloc_range() to emphasize that it returns a physical address. While on it, remove the 'enum memblock_flags' parameter from this function as its only user anyway sets it to MEMBLOCK_NONE, which is the default for the most of memblock allocations. Link: http://lkml.kernel.org/r/1548057848-15136-6-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Cc: Catalin Marinas Cc: Christophe Leroy Cc: Christoph Hellwig Cc: "David S. Miller" Cc: Dennis Zhou Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Kroah-Hartman Cc: Guan Xuetao Cc: Guo Ren Cc: Guo Ren [c-sky] Cc: Heiko Carstens Cc: Juergen Gross [Xen] Cc: Mark Salter Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Paul Burton Cc: Petr Mladek Cc: Richard Weinberger Cc: Rich Felker Cc: Rob Herring Cc: Rob Herring Cc: Russell King Cc: Stafford Horne Cc: Tony Luck Cc: Vineet Gupta Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 5 ++--- mm/cma.c | 10 ++++------ mm/memblock.c | 9 +++++---- 3 files changed, 11 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 4db53f7c6b17..251cd66b151b 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -325,6 +325,8 @@ static inline int memblock_get_region_node(const struct memblock_region *r) #define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL #endif +phys_addr_t memblock_phys_alloc_range(phys_addr_t size, phys_addr_t align, + phys_addr_t start, phys_addr_t end); phys_addr_t memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); @@ -443,9 +445,6 @@ static inline bool memblock_bottom_up(void) return memblock.bottom_up; } -phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align, - phys_addr_t start, phys_addr_t end, - enum memblock_flags flags); phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr); phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align, diff --git a/mm/cma.c b/mm/cma.c index f4f3a8a57d86..bb2d333ffcb3 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -327,16 +327,14 @@ int __init cma_declare_contiguous(phys_addr_t base, * memory in case of failure. */ if (base < highmem_start && limit > highmem_start) { - addr = memblock_alloc_range(size, alignment, - highmem_start, limit, - MEMBLOCK_NONE); + addr = memblock_phys_alloc_range(size, alignment, + highmem_start, limit); limit = highmem_start; } if (!addr) { - addr = memblock_alloc_range(size, alignment, base, - limit, - MEMBLOCK_NONE); + addr = memblock_phys_alloc_range(size, alignment, base, + limit); if (!addr) { ret = -ENOMEM; goto err; diff --git a/mm/memblock.c b/mm/memblock.c index e9e440cfd210..eb785ea6757b 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1281,12 +1281,13 @@ static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, return 0; } -phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align, - phys_addr_t start, phys_addr_t end, - enum memblock_flags flags) +phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size, + phys_addr_t align, + phys_addr_t start, + phys_addr_t end) { return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE, - flags); + MEMBLOCK_NONE); } phys_addr_t __init memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) -- cgit v1.2.3-59-g8ed1b From ecc3e771f4ca98c52a072e41804434b4979bdf84 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 11 Mar 2019 23:29:26 -0700 Subject: memblock: memblock_phys_alloc(): don't panic Make the memblock_phys_alloc() function an inline wrapper for memblock_phys_alloc_range() and update the memblock_phys_alloc() callers to check the returned value and panic in case of error. Link: http://lkml.kernel.org/r/1548057848-15136-8-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Cc: Catalin Marinas Cc: Christophe Leroy Cc: Christoph Hellwig Cc: "David S. Miller" Cc: Dennis Zhou Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Kroah-Hartman Cc: Guan Xuetao Cc: Guo Ren Cc: Guo Ren [c-sky] Cc: Heiko Carstens Cc: Juergen Gross [Xen] Cc: Mark Salter Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Paul Burton Cc: Petr Mladek Cc: Richard Weinberger Cc: Rich Felker Cc: Rob Herring Cc: Rob Herring Cc: Russell King Cc: Stafford Horne Cc: Tony Luck Cc: Vineet Gupta Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/init.c | 4 ++++ arch/arm64/mm/mmu.c | 2 ++ arch/powerpc/sysdev/dart_iommu.c | 3 +++ arch/s390/kernel/crash_dump.c | 3 +++ arch/s390/kernel/setup.c | 3 +++ arch/sh/boards/mach-ap325rxa/setup.c | 3 +++ arch/sh/boards/mach-ecovec24/setup.c | 6 ++++++ arch/sh/boards/mach-kfr2r09/setup.c | 3 +++ arch/sh/boards/mach-migor/setup.c | 3 +++ arch/sh/boards/mach-se/7724/setup.c | 6 ++++++ arch/xtensa/mm/kasan_init.c | 3 +++ include/linux/memblock.h | 7 ++++++- mm/memblock.c | 5 ----- 13 files changed, 45 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index b76b90eb9356..15dddfe43319 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -206,6 +206,10 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align) BUG_ON(!arm_memblock_steal_permitted); phys = memblock_phys_alloc(size, align); + if (!phys) + panic("Failed to steal %pa bytes at %pS\n", + &size, (void *)_RET_IP_); + memblock_free(phys, size); memblock_remove(phys, size); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 402b6495ff58..e97f018ff740 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -103,6 +103,8 @@ static phys_addr_t __init early_pgtable_alloc(void) void *ptr; phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); + if (!phys) + panic("Failed to allocate page table page\n"); /* * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index fc5c5c23303e..2a751795ec1e 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -265,6 +265,9 @@ static void allocate_dart(void) * prefetching into invalid pages and corrupting data */ tmp = memblock_phys_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE); + if (!tmp) + panic("DART: table allocation failed\n"); + dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) & DARTMAP_RPNMASK); diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 97eae3871868..f96a5857bbfd 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -61,6 +61,9 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu) struct save_area *sa; sa = (void *) memblock_phys_alloc(sizeof(*sa), 8); + if (!sa) + panic("Failed to allocate save area\n"); + if (is_boot_cpu) list_add(&sa->list, &dump_save_areas); else diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 12934e8fbb91..d7920f3e76c6 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -966,6 +966,9 @@ static void __init setup_randomness(void) vmms = (struct sysinfo_3_2_2 *) memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); + if (!vmms) + panic("Failed to allocate memory for sysinfo structure\n"); + if (stsi(vmms, 3, 2, 2) == 0 && vmms->count) add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count); memblock_free((unsigned long) vmms, PAGE_SIZE); diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c index 97774424fbee..8301a4378f50 100644 --- a/arch/sh/boards/mach-ap325rxa/setup.c +++ b/arch/sh/boards/mach-ap325rxa/setup.c @@ -557,6 +557,9 @@ static void __init ap325rxa_mv_mem_reserve(void) phys_addr_t size = CEU_BUFFER_MEMORY_SIZE; phys = memblock_phys_alloc(size, PAGE_SIZE); + if (!phys) + panic("Failed to allocate CEU memory\n"); + memblock_free(phys, size); memblock_remove(phys, size); diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index d329bf3be487..34e5414c5563 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -1477,11 +1477,17 @@ static void __init ecovec_mv_mem_reserve(void) phys_addr_t size = CEU_BUFFER_MEMORY_SIZE; phys = memblock_phys_alloc(size, PAGE_SIZE); + if (!phys) + panic("Failed to allocate CEU0 memory\n"); + memblock_free(phys, size); memblock_remove(phys, size); ceu0_dma_membase = phys; phys = memblock_phys_alloc(size, PAGE_SIZE); + if (!phys) + panic("Failed to allocate CEU1 memory\n"); + memblock_free(phys, size); memblock_remove(phys, size); ceu1_dma_membase = phys; diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c index 5c258ae9c43a..1cf9a47ac90e 100644 --- a/arch/sh/boards/mach-kfr2r09/setup.c +++ b/arch/sh/boards/mach-kfr2r09/setup.c @@ -631,6 +631,9 @@ static void __init kfr2r09_mv_mem_reserve(void) phys_addr_t size = CEU_BUFFER_MEMORY_SIZE; phys = memblock_phys_alloc(size, PAGE_SIZE); + if (!phys) + panic("Failed to allocate CEU memory\n"); + memblock_free(phys, size); memblock_remove(phys, size); diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c index 193d91bb84bf..90702740f207 100644 --- a/arch/sh/boards/mach-migor/setup.c +++ b/arch/sh/boards/mach-migor/setup.c @@ -631,6 +631,9 @@ static void __init migor_mv_mem_reserve(void) phys_addr_t size = CEU_BUFFER_MEMORY_SIZE; phys = memblock_phys_alloc(size, PAGE_SIZE); + if (!phys) + panic("Failed to allocate CEU memory\n"); + memblock_free(phys, size); memblock_remove(phys, size); diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c index 5c7aa37bfd86..3674064816c7 100644 --- a/arch/sh/boards/mach-se/7724/setup.c +++ b/arch/sh/boards/mach-se/7724/setup.c @@ -964,11 +964,17 @@ static void __init ms7724se_mv_mem_reserve(void) phys_addr_t size = CEU_BUFFER_MEMORY_SIZE; phys = memblock_phys_alloc(size, PAGE_SIZE); + if (!phys) + panic("Failed to allocate CEU0 memory\n"); + memblock_free(phys, size); memblock_remove(phys, size); ceu0_dma_membase = phys; phys = memblock_phys_alloc(size, PAGE_SIZE); + if (!phys) + panic("Failed to allocate CEU1 memory\n"); + memblock_free(phys, size); memblock_remove(phys, size); ceu1_dma_membase = phys; diff --git a/arch/xtensa/mm/kasan_init.c b/arch/xtensa/mm/kasan_init.c index 48dbb03f4f6f..4852848a0c28 100644 --- a/arch/xtensa/mm/kasan_init.c +++ b/arch/xtensa/mm/kasan_init.c @@ -54,6 +54,9 @@ static void __init populate(void *start, void *end) phys_addr_t phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); + if (!phys) + panic("Failed to allocate page table page\n"); + set_pte(pte + j, pfn_pte(PHYS_PFN(phys), PAGE_KERNEL)); } } diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 251cd66b151b..7caecb42bfea 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -330,7 +330,12 @@ phys_addr_t memblock_phys_alloc_range(phys_addr_t size, phys_addr_t align, phys_addr_t memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); -phys_addr_t memblock_phys_alloc(phys_addr_t size, phys_addr_t align); +static inline phys_addr_t memblock_phys_alloc(phys_addr_t size, + phys_addr_t align) +{ + return memblock_phys_alloc_range(size, align, 0, + MEMBLOCK_ALLOC_ACCESSIBLE); +} void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, diff --git a/mm/memblock.c b/mm/memblock.c index ac57bd3082bb..d0b76bb7340d 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1325,11 +1325,6 @@ phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys return alloc; } -phys_addr_t __init memblock_phys_alloc(phys_addr_t size, phys_addr_t align) -{ - return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); -} - phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) { phys_addr_t res = memblock_phys_alloc_nid(size, align, nid); -- cgit v1.2.3-59-g8ed1b From 42b46aeff2e366bad54bd1c069b7b5381d9be8b3 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 11 Mar 2019 23:29:31 -0700 Subject: memblock: drop __memblock_alloc_base() The __memblock_alloc_base() function tries to allocate a memory up to the limit specified by its max_addr parameter. Depending on the value of this parameter, the __memblock_alloc_base() can is replaced with the appropriate memblock_phys_alloc*() variant. Link: http://lkml.kernel.org/r/1548057848-15136-9-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Acked-by: Rob Herring Cc: Catalin Marinas Cc: Christophe Leroy Cc: Christoph Hellwig Cc: "David S. Miller" Cc: Dennis Zhou Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Kroah-Hartman Cc: Guan Xuetao Cc: Guo Ren Cc: Guo Ren [c-sky] Cc: Heiko Carstens Cc: Juergen Gross [Xen] Cc: Mark Salter Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Paul Burton Cc: Petr Mladek Cc: Richard Weinberger Cc: Rich Felker Cc: Rob Herring Cc: Russell King Cc: Stafford Horne Cc: Tony Luck Cc: Vineet Gupta Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/kernel/machine_kexec.c | 3 ++- arch/x86/kernel/e820.c | 2 +- arch/x86/mm/numa.c | 12 ++++-------- drivers/of/of_reserved_mem.c | 7 ++----- include/linux/memblock.h | 2 -- mm/memblock.c | 9 ++------- 6 files changed, 11 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c index b9f9f1a5afdc..63d63a36f6f2 100644 --- a/arch/sh/kernel/machine_kexec.c +++ b/arch/sh/kernel/machine_kexec.c @@ -168,7 +168,8 @@ void __init reserve_crashkernel(void) crash_size = PAGE_ALIGN(resource_size(&crashk_res)); if (!crashk_res.start) { unsigned long max = memblock_end_of_DRAM() - memory_limit; - crashk_res.start = __memblock_alloc_base(crash_size, PAGE_SIZE, max); + crashk_res.start = memblock_phys_alloc_range(crash_size, + PAGE_SIZE, 0, max); if (!crashk_res.start) { pr_err("crashkernel allocation failed\n"); goto disable; diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index a687d10da417..5203ee4e6435 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -775,7 +775,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align) { u64 addr; - addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); + addr = memblock_phys_alloc(size, align); if (addr) { e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); pr_info("update e820_table_kexec for e820__memblock_alloc_reserved()\n"); diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 12c1b7a83ed7..dfb6c4df639a 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -195,15 +195,11 @@ static void __init alloc_node_data(int nid) * Allocate node data. Try node-local memory and then any node. * Never allocate in DMA zone. */ - nd_pa = memblock_phys_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); + nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); if (!nd_pa) { - nd_pa = __memblock_alloc_base(nd_size, SMP_CACHE_BYTES, - MEMBLOCK_ALLOC_ACCESSIBLE); - if (!nd_pa) { - pr_err("Cannot find %zu bytes in any node (initial node: %d)\n", - nd_size, nid); - return; - } + pr_err("Cannot find %zu bytes in any node (initial node: %d)\n", + nd_size, nid); + return; } nd = __va(nd_pa); diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index e773063c6de9..8c07d7da5256 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -31,13 +31,10 @@ static int __init early_init_dt_alloc_reserved_memory_arch(phys_addr_t size, phys_addr_t *res_base) { phys_addr_t base; - /* - * We use __memblock_alloc_base() because memblock_alloc_base() - * panic()s on allocation failure. - */ + end = !end ? MEMBLOCK_ALLOC_ANYWHERE : end; align = !align ? SMP_CACHE_BYTES : align; - base = __memblock_alloc_base(size, align, end); + base = memblock_phys_alloc_range(size, align, 0, end); if (!base) return -ENOMEM; diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 7caecb42bfea..017aeb223b24 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -452,8 +452,6 @@ static inline bool memblock_bottom_up(void) phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr); -phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align, - phys_addr_t max_addr); phys_addr_t memblock_phys_mem_size(void); phys_addr_t memblock_reserved_size(void); phys_addr_t memblock_mem_size(unsigned long limit_pfn); diff --git a/mm/memblock.c b/mm/memblock.c index d0b76bb7340d..5b6aeb8108d9 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1306,17 +1306,12 @@ again: return ret; } -phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) -{ - return memblock_alloc_range_nid(size, align, 0, max_addr, NUMA_NO_NODE, - MEMBLOCK_NONE); -} - phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) { phys_addr_t alloc; - alloc = __memblock_alloc_base(size, align, max_addr); + alloc = memblock_alloc_range_nid(size, align, 0, max_addr, NUMA_NO_NODE, + MEMBLOCK_NONE); if (alloc == 0) panic("ERROR: Failed to allocate %pa bytes below %pa.\n", -- cgit v1.2.3-59-g8ed1b From 0ba9e6edd4c2e563a9b34c8a46649218814a363f Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 11 Mar 2019 23:29:35 -0700 Subject: memblock: drop memblock_alloc_base() The memblock_alloc_base() function tries to allocate a memory up to the limit specified by its max_addr parameter and panics if the allocation fails. Replace its usage with memblock_phys_alloc_range() and make the callers check the return value and panic in case of error. Link: http://lkml.kernel.org/r/1548057848-15136-10-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Acked-by: Michael Ellerman [powerpc] Cc: Catalin Marinas Cc: Christophe Leroy Cc: Christoph Hellwig Cc: "David S. Miller" Cc: Dennis Zhou Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Kroah-Hartman Cc: Guan Xuetao Cc: Guo Ren Cc: Guo Ren [c-sky] Cc: Heiko Carstens Cc: Juergen Gross [Xen] Cc: Mark Salter Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Paul Burton Cc: Petr Mladek Cc: Richard Weinberger Cc: Rich Felker Cc: Rob Herring Cc: Rob Herring Cc: Russell King Cc: Stafford Horne Cc: Tony Luck Cc: Vineet Gupta Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/rtas.c | 6 +++++- arch/powerpc/mm/hash_utils_64.c | 8 ++++++-- arch/s390/kernel/smp.c | 6 +++++- drivers/macintosh/smu.c | 2 +- include/linux/memblock.h | 2 -- mm/memblock.c | 14 -------------- 6 files changed, 17 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index de35bd8f047f..fbc676160adf 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1187,7 +1187,11 @@ void __init rtas_initialize(void) ibm_suspend_me_token = rtas_token("ibm,suspend-me"); } #endif - rtas_rmo_buf = memblock_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, rtas_region); + rtas_rmo_buf = memblock_phys_alloc_range(RTAS_RMOBUF_MAX, PAGE_SIZE, + 0, rtas_region); + if (!rtas_rmo_buf) + panic("ERROR: RTAS: Failed to allocate %lx bytes below %pa\n", + PAGE_SIZE, &rtas_region); #ifdef CONFIG_RTAS_ERROR_LOGGING rtas_last_error_token = rtas_token("rtas-last-error"); diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 3d4b2399192f..880a366c229c 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -882,8 +882,12 @@ static void __init htab_initialize(void) } #endif /* CONFIG_PPC_CELL */ - table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, - limit); + table = memblock_phys_alloc_range(htab_size_bytes, + htab_size_bytes, + 0, limit); + if (!table) + panic("ERROR: Failed to allocate %pa bytes below %pa\n", + &htab_size_bytes, &limit); DBG("Hash table allocated at %lx, size: %lx\n", table, htab_size_bytes); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index b198ece2aad6..5e3cccc408b8 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -656,7 +656,11 @@ void __init smp_save_dump_cpus(void) /* No previous system present, normal boot. */ return; /* Allocate a page as dumping area for the store status sigps */ - page = memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, 1UL << 31); + page = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0, 1UL << 31); + if (!page) + panic("ERROR: Failed to allocate %x bytes below %lx\n", + PAGE_SIZE, 1UL << 31); + /* Set multi-threading state to the previous system. */ pcpu_set_smt(sclp.mtid_prev); boot_cpu_addr = stap(); diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index 0a0b8e1f4236..42cf68d15da3 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -485,7 +485,7 @@ int __init smu_init (void) * SMU based G5s need some memory below 2Gb. Thankfully this is * called at a time where memblock is still available. */ - smu_cmdbuf_abs = memblock_alloc_base(4096, 4096, 0x80000000UL); + smu_cmdbuf_abs = memblock_phys_alloc_range(4096, 4096, 0, 0x80000000UL); if (smu_cmdbuf_abs == 0) { printk(KERN_ERR "SMU: Command buffer allocation failed !\n"); ret = -EINVAL; diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 017aeb223b24..0c8375120322 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -450,8 +450,6 @@ static inline bool memblock_bottom_up(void) return memblock.bottom_up; } -phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align, - phys_addr_t max_addr); phys_addr_t memblock_phys_mem_size(void); phys_addr_t memblock_reserved_size(void); phys_addr_t memblock_mem_size(unsigned long limit_pfn); diff --git a/mm/memblock.c b/mm/memblock.c index 5b6aeb8108d9..42fe65447d8b 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1306,20 +1306,6 @@ again: return ret; } -phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) -{ - phys_addr_t alloc; - - alloc = memblock_alloc_range_nid(size, align, 0, max_addr, NUMA_NO_NODE, - MEMBLOCK_NONE); - - if (alloc == 0) - panic("ERROR: Failed to allocate %pa bytes below %pa.\n", - &size, &max_addr); - - return alloc; -} - phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) { phys_addr_t res = memblock_phys_alloc_nid(size, align, nid); -- cgit v1.2.3-59-g8ed1b From 92d12f9544b7b133b54cb64f687f3f45fce0043c Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 11 Mar 2019 23:29:41 -0700 Subject: memblock: refactor internal allocation functions Currently, memblock has several internal functions with overlapping functionality. They all call memblock_find_in_range_node() to find free memory and then reserve the allocated range and mark it with kmemleak. However, there is difference in the allocation constraints and in fallback strategies. The allocations returning physical address first attempt to find free memory on the specified node within mirrored memory regions, then retry on the same node without the requirement for memory mirroring and finally fall back to all available memory. The allocations returning virtual address start with clamping the allowed range to memblock.current_limit, attempt to allocate from the specified node from regions with mirroring and with user defined minimal address. If such allocation fails, next attempt is done with node restriction lifted. Next, the allocation is retried with minimal address reset to zero and at last without the requirement for mirrored regions. Let's consolidate various fallbacks handling and make them more consistent for physical and virtual variants. Most of the fallback handling is moved to memblock_alloc_range_nid() and it now handles node and mirror fallbacks. The memblock_alloc_internal() uses memblock_alloc_range_nid() to get a physical address of the allocated range and converts it to virtual address. The fallback for allocation below the specified minimal address remains in memblock_alloc_internal() because memblock_alloc_range_nid() is used by CMA with exact requirement for lower bounds. The memblock_phys_alloc_nid() function is completely dropped as it is not used anywhere outside memblock and its only usage can be replaced by a call to memblock_alloc_range_nid(). [rppt@linux.ibm.com: fix parameter order in memblock_phys_alloc_try_nid()] Link: http://lkml.kernel.org/r/20190203113915.GC8620@rapoport-lnx Link: http://lkml.kernel.org/r/1548057848-15136-11-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Tested-by: Michael Ellerman Cc: Catalin Marinas Cc: Christophe Leroy Cc: Christoph Hellwig Cc: "David S. Miller" Cc: Dennis Zhou Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Kroah-Hartman Cc: Guan Xuetao Cc: Guo Ren Cc: Guo Ren [c-sky] Cc: Heiko Carstens Cc: Juergen Gross [Xen] Cc: Mark Salter Cc: Matt Turner Cc: Max Filippov Cc: Michal Simek Cc: Paul Burton Cc: Petr Mladek Cc: Richard Weinberger Cc: Rich Felker Cc: Rob Herring Cc: Rob Herring Cc: Russell King Cc: Stafford Horne Cc: Tony Luck Cc: Vineet Gupta Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 1 - mm/memblock.c | 171 +++++++++++++++++++++-------------------------- 2 files changed, 77 insertions(+), 95 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 0c8375120322..c1315c331a8e 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -327,7 +327,6 @@ static inline int memblock_get_region_node(const struct memblock_region *r) phys_addr_t memblock_phys_alloc_range(phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t end); -phys_addr_t memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); static inline phys_addr_t memblock_phys_alloc(phys_addr_t size, diff --git a/mm/memblock.c b/mm/memblock.c index 42fe65447d8b..31e89dac9a23 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1255,30 +1255,84 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, } #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ +/** + * memblock_alloc_range_nid - allocate boot memory block + * @size: size of memory block to be allocated in bytes + * @align: alignment of the region and block's size + * @start: the lower bound of the memory region to allocate (phys address) + * @end: the upper bound of the memory region to allocate (phys address) + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node + * + * The allocation is performed from memory region limited by + * memblock.current_limit if @max_addr == %MEMBLOCK_ALLOC_ACCESSIBLE. + * + * If the specified node can not hold the requested memory the + * allocation falls back to any node in the system + * + * For systems with memory mirroring, the allocation is attempted first + * from the regions with mirroring enabled and then retried from any + * memory region. + * + * In addition, function sets the min_count to 0 using kmemleak_alloc_phys for + * allocated boot memory block, so that it is never reported as leaks. + * + * Return: + * Physical address of allocated memory block on success, %0 on failure. + */ static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, phys_addr_t align, phys_addr_t start, - phys_addr_t end, int nid, - enum memblock_flags flags) + phys_addr_t end, int nid) { + enum memblock_flags flags = choose_memblock_flags(); phys_addr_t found; + if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) + nid = NUMA_NO_NODE; + if (!align) { /* Can't use WARNs this early in boot on powerpc */ dump_stack(); align = SMP_CACHE_BYTES; } + if (end > memblock.current_limit) + end = memblock.current_limit; + +again: found = memblock_find_in_range_node(size, align, start, end, nid, flags); - if (found && !memblock_reserve(found, size)) { + if (found && !memblock_reserve(found, size)) + goto done; + + if (nid != NUMA_NO_NODE) { + found = memblock_find_in_range_node(size, align, start, + end, NUMA_NO_NODE, + flags); + if (found && !memblock_reserve(found, size)) + goto done; + } + + if (flags & MEMBLOCK_MIRROR) { + flags &= ~MEMBLOCK_MIRROR; + pr_warn("Could not allocate %pap bytes of mirrored memory\n", + &size); + goto again; + } + + return 0; + +done: + /* Skip kmemleak for kasan_init() due to high volume. */ + if (end != MEMBLOCK_ALLOC_KASAN) /* - * The min_count is set to 0 so that memblock allocations are - * never reported as leaks. + * The min_count is set to 0 so that memblock allocated + * blocks are never reported as leaks. This is because many + * of these blocks are only referred via the physical + * address which is not looked up by kmemleak. */ kmemleak_alloc_phys(found, size, 0, 0); - return found; - } - return 0; + + return found; } phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size, @@ -1286,35 +1340,13 @@ phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size, phys_addr_t start, phys_addr_t end) { - return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE, - MEMBLOCK_NONE); -} - -phys_addr_t __init memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) -{ - enum memblock_flags flags = choose_memblock_flags(); - phys_addr_t ret; - -again: - ret = memblock_alloc_range_nid(size, align, 0, - MEMBLOCK_ALLOC_ACCESSIBLE, nid, flags); - - if (!ret && (flags & MEMBLOCK_MIRROR)) { - flags &= ~MEMBLOCK_MIRROR; - goto again; - } - return ret; + return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE); } phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) { - phys_addr_t res = memblock_phys_alloc_nid(size, align, nid); - - if (res) - return res; return memblock_alloc_range_nid(size, align, 0, - MEMBLOCK_ALLOC_ACCESSIBLE, - NUMA_NO_NODE, MEMBLOCK_NONE); + MEMBLOCK_ALLOC_ACCESSIBLE, nid); } /** @@ -1325,19 +1357,13 @@ phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali * @max_addr: the upper bound of the memory region to allocate (phys address) * @nid: nid of the free area to find, %NUMA_NO_NODE for any node * - * The @min_addr limit is dropped if it can not be satisfied and the allocation - * will fall back to memory below @min_addr. Also, allocation may fall back - * to any node in the system if the specified node can not - * hold the requested memory. - * - * The allocation is performed from memory region limited by - * memblock.current_limit if @max_addr == %MEMBLOCK_ALLOC_ACCESSIBLE. - * - * The phys address of allocated boot memory block is converted to virtual and - * allocated memory is reset to 0. + * Allocates memory block using memblock_alloc_range_nid() and + * converts the returned physical address to virtual. * - * In addition, function sets the min_count to 0 using kmemleak_alloc for - * allocated boot memory block, so that it is never reported as leaks. + * The @min_addr limit is dropped if it can not be satisfied and the allocation + * will fall back to memory below @min_addr. Other constraints, such + * as node and mirrored memory will be handled again in + * memblock_alloc_range_nid(). * * Return: * Virtual address of allocated memory block on success, NULL on failure. @@ -1348,11 +1374,6 @@ static void * __init memblock_alloc_internal( int nid) { phys_addr_t alloc; - void *ptr; - enum memblock_flags flags = choose_memblock_flags(); - - if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) - nid = NUMA_NO_NODE; /* * Detect any accidental use of these APIs after slab is ready, as at @@ -1362,54 +1383,16 @@ static void * __init memblock_alloc_internal( if (WARN_ON_ONCE(slab_is_available())) return kzalloc_node(size, GFP_NOWAIT, nid); - if (!align) { - dump_stack(); - align = SMP_CACHE_BYTES; - } - - if (max_addr > memblock.current_limit) - max_addr = memblock.current_limit; -again: - alloc = memblock_find_in_range_node(size, align, min_addr, max_addr, - nid, flags); - if (alloc && !memblock_reserve(alloc, size)) - goto done; - - if (nid != NUMA_NO_NODE) { - alloc = memblock_find_in_range_node(size, align, min_addr, - max_addr, NUMA_NO_NODE, - flags); - if (alloc && !memblock_reserve(alloc, size)) - goto done; - } - - if (min_addr) { - min_addr = 0; - goto again; - } - - if (flags & MEMBLOCK_MIRROR) { - flags &= ~MEMBLOCK_MIRROR; - pr_warn("Could not allocate %pap bytes of mirrored memory\n", - &size); - goto again; - } + alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid); - return NULL; -done: - ptr = phys_to_virt(alloc); + /* retry allocation without lower limit */ + if (!alloc && min_addr) + alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid); - /* Skip kmemleak for kasan_init() due to high volume. */ - if (max_addr != MEMBLOCK_ALLOC_KASAN) - /* - * The min_count is set to 0 so that bootmem allocated - * blocks are never reported as leaks. This is because many - * of these blocks are only referred via the physical - * address which is not looked up by kmemleak. - */ - kmemleak_alloc(ptr, size, 0, 0); + if (!alloc) + return NULL; - return ptr; + return phys_to_virt(alloc); } /** -- cgit v1.2.3-59-g8ed1b From c366ea89fa40f244d1210e74485fce110835b71b Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 11 Mar 2019 23:29:46 -0700 Subject: memblock: make memblock_find_in_range_node() and choose_memblock_flags() static These functions are not used outside memblock. Make them static. Link: http://lkml.kernel.org/r/1548057848-15136-12-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Cc: Catalin Marinas Cc: Christophe Leroy Cc: Christoph Hellwig Cc: "David S. Miller" Cc: Dennis Zhou Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Kroah-Hartman Cc: Guan Xuetao Cc: Guo Ren Cc: Guo Ren [c-sky] Cc: Heiko Carstens Cc: Juergen Gross [Xen] Cc: Mark Salter Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Paul Burton Cc: Petr Mladek Cc: Richard Weinberger Cc: Rich Felker Cc: Rob Herring Cc: Rob Herring Cc: Russell King Cc: Stafford Horne Cc: Tony Luck Cc: Vineet Gupta Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 4 ---- mm/memblock.c | 4 ++-- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c1315c331a8e..c077227e6d53 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -108,9 +108,6 @@ void memblock_discard(void); #define memblock_dbg(fmt, ...) \ if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) -phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align, - phys_addr_t start, phys_addr_t end, - int nid, enum memblock_flags flags); phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align); void memblock_allow_resize(void); @@ -127,7 +124,6 @@ int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); -enum memblock_flags choose_memblock_flags(void); unsigned long memblock_free_all(void); void reset_node_managed_pages(pg_data_t *pgdat); diff --git a/mm/memblock.c b/mm/memblock.c index 31e89dac9a23..618f94a1eedb 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -132,7 +132,7 @@ static int memblock_can_resize __initdata_memblock; static int memblock_memory_in_slab __initdata_memblock = 0; static int memblock_reserved_in_slab __initdata_memblock = 0; -enum memblock_flags __init_memblock choose_memblock_flags(void) +static enum memblock_flags __init_memblock choose_memblock_flags(void) { return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE; } @@ -261,7 +261,7 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end, * Return: * Found address on success, 0 on failure. */ -phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, +static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t end, int nid, enum memblock_flags flags) -- cgit v1.2.3-59-g8ed1b From 26fb3dae0a1ec78bdde4b5b72e0e709503e8c596 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 11 Mar 2019 23:30:42 -0700 Subject: memblock: drop memblock_alloc_*_nopanic() variants As all the memblock allocation functions return NULL in case of error rather than panic(), the duplicates with _nopanic suffix can be removed. Link: http://lkml.kernel.org/r/1548057848-15136-22-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Acked-by: Greg Kroah-Hartman Reviewed-by: Petr Mladek [printk] Cc: Catalin Marinas Cc: Christophe Leroy Cc: Christoph Hellwig Cc: "David S. Miller" Cc: Dennis Zhou Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Guan Xuetao Cc: Guo Ren Cc: Guo Ren [c-sky] Cc: Heiko Carstens Cc: Juergen Gross [Xen] Cc: Mark Salter Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Paul Burton Cc: Richard Weinberger Cc: Rich Felker Cc: Rob Herring Cc: Rob Herring Cc: Russell King Cc: Stafford Horne Cc: Tony Luck Cc: Vineet Gupta Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arc/kernel/unwind.c | 3 +-- arch/sh/mm/init.c | 2 +- arch/x86/kernel/setup_percpu.c | 10 +++++----- arch/x86/mm/kasan_init_64.c | 14 ++++++++------ drivers/firmware/memmap.c | 2 +- drivers/usb/early/xhci-dbc.c | 2 +- include/linux/memblock.h | 35 ----------------------------------- kernel/dma/swiotlb.c | 2 +- kernel/printk/printk.c | 9 +-------- mm/memblock.c | 35 ----------------------------------- mm/page_alloc.c | 10 +++++----- mm/page_ext.c | 2 +- mm/percpu.c | 11 ++++------- mm/sparse.c | 6 ++---- 14 files changed, 31 insertions(+), 112 deletions(-) (limited to 'include') diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index d34f69eb1a95..271e9fafa479 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -181,8 +181,7 @@ static void init_unwind_hdr(struct unwind_table *table, */ static void *__init unw_hdr_alloc_early(unsigned long sz) { - return memblock_alloc_from_nopanic(sz, sizeof(unsigned int), - MAX_DMA_ADDRESS); + return memblock_alloc_from(sz, sizeof(unsigned int), MAX_DMA_ADDRESS); } static void *unw_hdr_alloc(unsigned long sz) diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index fceefd92016f..70621324db41 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -202,7 +202,7 @@ void __init allocate_pgdat(unsigned int nid) get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); #ifdef CONFIG_NEED_MULTIPLE_NODES - NODE_DATA(nid) = memblock_alloc_try_nid_nopanic( + NODE_DATA(nid) = memblock_alloc_try_nid( sizeof(struct pglist_data), SMP_CACHE_BYTES, MEMBLOCK_LOW_LIMIT, MEMBLOCK_ALLOC_ACCESSIBLE, nid); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 13af08827eef..4bf46575568a 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -106,22 +106,22 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, void *ptr; if (!node_online(node) || !NODE_DATA(node)) { - ptr = memblock_alloc_from_nopanic(size, align, goal); + ptr = memblock_alloc_from(size, align, goal); pr_info("cpu %d has no node %d or node-local memory\n", cpu, node); pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n", cpu, size, __pa(ptr)); } else { - ptr = memblock_alloc_try_nid_nopanic(size, align, goal, - MEMBLOCK_ALLOC_ACCESSIBLE, - node); + ptr = memblock_alloc_try_nid(size, align, goal, + MEMBLOCK_ALLOC_ACCESSIBLE, + node); pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n", cpu, size, node, __pa(ptr)); } return ptr; #else - return memblock_alloc_from_nopanic(size, align, goal); + return memblock_alloc_from(size, align, goal); #endif } diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 462fde83b515..8dc0fc0b1382 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -24,14 +24,16 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES]; static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); -static __init void *early_alloc(size_t size, int nid, bool panic) +static __init void *early_alloc(size_t size, int nid, bool should_panic) { - if (panic) - return memblock_alloc_try_nid(size, size, - __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid); - else - return memblock_alloc_try_nid_nopanic(size, size, + void *ptr = memblock_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid); + + if (!ptr && should_panic) + panic("%pS: Failed to allocate page, nid=%d from=%lx\n", + (void *)_RET_IP_, nid, __pa(MAX_DMA_ADDRESS)); + + return ptr; } static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c index ec4fd253a4e9..d168c87c7d30 100644 --- a/drivers/firmware/memmap.c +++ b/drivers/firmware/memmap.c @@ -333,7 +333,7 @@ int __init firmware_map_add_early(u64 start, u64 end, const char *type) { struct firmware_map_entry *entry; - entry = memblock_alloc_nopanic(sizeof(struct firmware_map_entry), + entry = memblock_alloc(sizeof(struct firmware_map_entry), SMP_CACHE_BYTES); if (WARN_ON(!entry)) return -ENOMEM; diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c index d2652dccc699..c9cfb100ecdc 100644 --- a/drivers/usb/early/xhci-dbc.c +++ b/drivers/usb/early/xhci-dbc.c @@ -94,7 +94,7 @@ static void * __init xdbc_get_page(dma_addr_t *dma_addr) { void *virt; - virt = memblock_alloc_nopanic(PAGE_SIZE, PAGE_SIZE); + virt = memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!virt) return NULL; diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c077227e6d53..db69ad97aa2e 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -335,9 +335,6 @@ static inline phys_addr_t memblock_phys_alloc(phys_addr_t size, void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, int nid); -void *memblock_alloc_try_nid_nopanic(phys_addr_t size, phys_addr_t align, - phys_addr_t min_addr, phys_addr_t max_addr, - int nid); void *memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, int nid); @@ -364,36 +361,12 @@ static inline void * __init memblock_alloc_from(phys_addr_t size, MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE); } -static inline void * __init memblock_alloc_nopanic(phys_addr_t size, - phys_addr_t align) -{ - return memblock_alloc_try_nid_nopanic(size, align, MEMBLOCK_LOW_LIMIT, - MEMBLOCK_ALLOC_ACCESSIBLE, - NUMA_NO_NODE); -} - static inline void * __init memblock_alloc_low(phys_addr_t size, phys_addr_t align) { return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT, ARCH_LOW_ADDRESS_LIMIT, NUMA_NO_NODE); } -static inline void * __init memblock_alloc_low_nopanic(phys_addr_t size, - phys_addr_t align) -{ - return memblock_alloc_try_nid_nopanic(size, align, MEMBLOCK_LOW_LIMIT, - ARCH_LOW_ADDRESS_LIMIT, - NUMA_NO_NODE); -} - -static inline void * __init memblock_alloc_from_nopanic(phys_addr_t size, - phys_addr_t align, - phys_addr_t min_addr) -{ - return memblock_alloc_try_nid_nopanic(size, align, min_addr, - MEMBLOCK_ALLOC_ACCESSIBLE, - NUMA_NO_NODE); -} static inline void * __init memblock_alloc_node(phys_addr_t size, phys_addr_t align, int nid) @@ -402,14 +375,6 @@ static inline void * __init memblock_alloc_node(phys_addr_t size, MEMBLOCK_ALLOC_ACCESSIBLE, nid); } -static inline void * __init memblock_alloc_node_nopanic(phys_addr_t size, - int nid) -{ - return memblock_alloc_try_nid_nopanic(size, SMP_CACHE_BYTES, - MEMBLOCK_LOW_LIMIT, - MEMBLOCK_ALLOC_ACCESSIBLE, nid); -} - static inline void __init memblock_free_early(phys_addr_t base, phys_addr_t size) { diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 56ac77a80b1f..53012db1e53c 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -256,7 +256,7 @@ swiotlb_init(int verbose) bytes = io_tlb_nslabs << IO_TLB_SHIFT; /* Get IO TLB memory from the low pages */ - vstart = memblock_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE); + vstart = memblock_alloc_low(PAGE_ALIGN(bytes), PAGE_SIZE); if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) return; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 8eee85bb2687..6b7654b8001f 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1143,14 +1143,7 @@ void __init setup_log_buf(int early) if (!new_log_buf_len) return; - if (early) { - new_log_buf = - memblock_alloc(new_log_buf_len, LOG_ALIGN); - } else { - new_log_buf = memblock_alloc_nopanic(new_log_buf_len, - LOG_ALIGN); - } - + new_log_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN); if (unlikely(!new_log_buf)) { pr_err("log_buf_len: %lu bytes not available\n", new_log_buf_len); diff --git a/mm/memblock.c b/mm/memblock.c index a838c50ca9a8..0ab30d0185bc 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1433,41 +1433,6 @@ void * __init memblock_alloc_try_nid_raw( return ptr; } -/** - * memblock_alloc_try_nid_nopanic - allocate boot memory block - * @size: size of memory block to be allocated in bytes - * @align: alignment of the region and block's size - * @min_addr: the lower bound of the memory region from where the allocation - * is preferred (phys address) - * @max_addr: the upper bound of the memory region from where the allocation - * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to - * allocate only from memory limited by memblock.current_limit value - * @nid: nid of the free area to find, %NUMA_NO_NODE for any node - * - * Public function, provides additional debug information (including caller - * info), if enabled. This function zeroes the allocated memory. - * - * Return: - * Virtual address of allocated memory block on success, NULL on failure. - */ -void * __init memblock_alloc_try_nid_nopanic( - phys_addr_t size, phys_addr_t align, - phys_addr_t min_addr, phys_addr_t max_addr, - int nid) -{ - void *ptr; - - memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pF\n", - __func__, (u64)size, (u64)align, nid, &min_addr, - &max_addr, (void *)_RET_IP_); - - ptr = memblock_alloc_internal(size, align, - min_addr, max_addr, nid); - if (ptr) - memset(ptr, 0, size); - return ptr; -} - /** * memblock_alloc_try_nid - allocate boot memory block * @size: size of memory block to be allocated in bytes diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3eb01dedfb50..03fcf73d47da 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6445,8 +6445,8 @@ static void __ref setup_usemap(struct pglist_data *pgdat, zone->pageblock_flags = NULL; if (usemapsize) { zone->pageblock_flags = - memblock_alloc_node_nopanic(usemapsize, - pgdat->node_id); + memblock_alloc_node(usemapsize, SMP_CACHE_BYTES, + pgdat->node_id); if (!zone->pageblock_flags) panic("Failed to allocate %ld bytes for zone %s pageblock flags on node %d\n", usemapsize, zone->name, pgdat->node_id); @@ -6679,7 +6679,8 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat) end = pgdat_end_pfn(pgdat); end = ALIGN(end, MAX_ORDER_NR_PAGES); size = (end - start) * sizeof(struct page); - map = memblock_alloc_node_nopanic(size, pgdat->node_id); + map = memblock_alloc_node(size, SMP_CACHE_BYTES, + pgdat->node_id); if (!map) panic("Failed to allocate %ld bytes for node %d memory map\n", size, pgdat->node_id); @@ -7959,8 +7960,7 @@ void *__init alloc_large_system_hash(const char *tablename, size = bucketsize << log2qty; if (flags & HASH_EARLY) { if (flags & HASH_ZERO) - table = memblock_alloc_nopanic(size, - SMP_CACHE_BYTES); + table = memblock_alloc(size, SMP_CACHE_BYTES); else table = memblock_alloc_raw(size, SMP_CACHE_BYTES); diff --git a/mm/page_ext.c b/mm/page_ext.c index ab4244920e0f..d8f1aca4ad43 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -161,7 +161,7 @@ static int __init alloc_node_page_ext(int nid) table_size = get_entry_size() * nr_pages; - base = memblock_alloc_try_nid_nopanic( + base = memblock_alloc_try_nid( table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid); if (!base) diff --git a/mm/percpu.c b/mm/percpu.c index 3f9fb3086a9b..2e6fc8d552c9 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1905,7 +1905,7 @@ struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, __alignof__(ai->groups[0].cpu_map[0])); ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]); - ptr = memblock_alloc_nopanic(PFN_ALIGN(ai_size), PAGE_SIZE); + ptr = memblock_alloc(PFN_ALIGN(ai_size), PAGE_SIZE); if (!ptr) return NULL; ai = ptr; @@ -2496,7 +2496,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, size_sum = ai->static_size + ai->reserved_size + ai->dyn_size; areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *)); - areas = memblock_alloc_nopanic(areas_size, SMP_CACHE_BYTES); + areas = memblock_alloc(areas_size, SMP_CACHE_BYTES); if (!areas) { rc = -ENOMEM; goto out_free; @@ -2729,8 +2729,7 @@ EXPORT_SYMBOL(__per_cpu_offset); static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size, size_t align) { - return memblock_alloc_from_nopanic( - size, align, __pa(MAX_DMA_ADDRESS)); + return memblock_alloc_from(size, align, __pa(MAX_DMA_ADDRESS)); } static void __init pcpu_dfl_fc_free(void *ptr, size_t size) @@ -2778,9 +2777,7 @@ void __init setup_per_cpu_areas(void) void *fc; ai = pcpu_alloc_alloc_info(1, 1); - fc = memblock_alloc_from_nopanic(unit_size, - PAGE_SIZE, - __pa(MAX_DMA_ADDRESS)); + fc = memblock_alloc_from(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); if (!ai || !fc) panic("Failed to allocate memory for percpu areas."); /* kmemleak tracks the percpu allocations separately */ diff --git a/mm/sparse.c b/mm/sparse.c index 7397fb4e78b4..69904aa6165b 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -330,9 +330,7 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, limit = goal + (1UL << PA_SECTION_SHIFT); nid = early_pfn_to_nid(goal >> PAGE_SHIFT); again: - p = memblock_alloc_try_nid_nopanic(size, - SMP_CACHE_BYTES, goal, limit, - nid); + p = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, goal, limit, nid); if (!p && limit) { limit = 0; goto again; @@ -386,7 +384,7 @@ static unsigned long * __init sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, unsigned long size) { - return memblock_alloc_node_nopanic(size, pgdat->node_id); + return memblock_alloc_node(size, SMP_CACHE_BYTES, pgdat->node_id); } static void __init check_usemap_section_nr(int nid, unsigned long *usemap) -- cgit v1.2.3-59-g8ed1b From fe145124dbe53c86bf32b941b2f2f88f891d985d Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 11 Mar 2019 23:30:46 -0700 Subject: memblock: remove memblock_{set,clear}_region_flags The memblock API provides dedicated helpers to set or clear a flag on a memory region, e.g. memblock_{mark,clear}_hotplug(). The memblock_{set,clear}_region_flags() functions are used only by the memblock internal function that adjusts the region flags. Drop these functions and use open-coded implementation instead. Link: http://lkml.kernel.org/r/1549455025-17706-2-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Reviewed-by: Andrew Morton Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 12 ------------ mm/memblock.c | 9 ++++++--- 2 files changed, 6 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index db69ad97aa2e..294d5d80e150 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -273,18 +273,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved, \ nid, flags, p_start, p_end, p_nid) -static inline void memblock_set_region_flags(struct memblock_region *r, - enum memblock_flags flags) -{ - r->flags |= flags; -} - -static inline void memblock_clear_region_flags(struct memblock_region *r, - enum memblock_flags flags) -{ - r->flags &= ~flags; -} - #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP int memblock_set_node(phys_addr_t base, phys_addr_t size, struct memblock_type *type, int nid); diff --git a/mm/memblock.c b/mm/memblock.c index 0ab30d0185bc..068e147695ee 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -858,11 +858,14 @@ static int __init_memblock memblock_setclr_flag(phys_addr_t base, if (ret) return ret; - for (i = start_rgn; i < end_rgn; i++) + for (i = start_rgn; i < end_rgn; i++) { + struct memblock_region *r = &type->regions[i]; + if (set) - memblock_set_region_flags(&type->regions[i], flag); + r->flags |= flag; else - memblock_clear_region_flags(&type->regions[i], flag); + r->flags &= ~flag; + } memblock_merge_regions(type); return 0; -- cgit v1.2.3-59-g8ed1b From ba20ba2e3743bac786dff777954c11930256075e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 11 Mar 2019 23:31:14 -0700 Subject: generic radix trees Very simple radix tree implementation that supports storing arbitrary size entries, up to PAGE_SIZE - upcoming patches will convert existing flex_array users to genradixes. The new genradix code has a much simpler API and implementation, and doesn't have a hard limit on the number of elements like flex_array does. Link: http://lkml.kernel.org/r/20181217131929.11727-5-kent.overstreet@gmail.com Signed-off-by: Kent Overstreet Cc: Alexey Dobriyan Cc: Al Viro Cc: Dave Hansen Cc: Eric Paris Cc: Marcelo Ricardo Leitner Cc: Matthew Wilcox Cc: Neil Horman Cc: Paul Moore Cc: Pravin B Shelar Cc: Shaohua Li Cc: Stephen Smalley Cc: Vlad Yasevich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/core-api/generic-radix-tree.rst | 12 ++ Documentation/core-api/index.rst | 1 + include/linux/generic-radix-tree.h | 231 ++++++++++++++++++++++++++ lib/Makefile | 3 +- lib/generic-radix-tree.c | 217 ++++++++++++++++++++++++ 5 files changed, 463 insertions(+), 1 deletion(-) create mode 100644 Documentation/core-api/generic-radix-tree.rst create mode 100644 include/linux/generic-radix-tree.h create mode 100644 lib/generic-radix-tree.c (limited to 'include') diff --git a/Documentation/core-api/generic-radix-tree.rst b/Documentation/core-api/generic-radix-tree.rst new file mode 100644 index 000000000000..ed42839ae42f --- /dev/null +++ b/Documentation/core-api/generic-radix-tree.rst @@ -0,0 +1,12 @@ +================================= +Generic radix trees/sparse arrays +================================= + +.. kernel-doc:: include/linux/generic-radix-tree.h + :doc: Generic radix trees/sparse arrays + +generic radix tree functions +---------------------------- + +.. kernel-doc:: include/linux/generic-radix-tree.h + :functions: diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst index 3adee82be311..6870baffef82 100644 --- a/Documentation/core-api/index.rst +++ b/Documentation/core-api/index.rst @@ -28,6 +28,7 @@ Core utilities errseq printk-formats circular-buffers + generic-radix-tree memory-allocation mm-api gfp_mask-from-fs-io diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h new file mode 100644 index 000000000000..3a91130a4fbd --- /dev/null +++ b/include/linux/generic-radix-tree.h @@ -0,0 +1,231 @@ +#ifndef _LINUX_GENERIC_RADIX_TREE_H +#define _LINUX_GENERIC_RADIX_TREE_H + +/** + * DOC: Generic radix trees/sparse arrays: + * + * Very simple and minimalistic, supporting arbitrary size entries up to + * PAGE_SIZE. + * + * A genradix is defined with the type it will store, like so: + * + * static GENRADIX(struct foo) foo_genradix; + * + * The main operations are: + * + * - genradix_init(radix) - initialize an empty genradix + * + * - genradix_free(radix) - free all memory owned by the genradix and + * reinitialize it + * + * - genradix_ptr(radix, idx) - gets a pointer to the entry at idx, returning + * NULL if that entry does not exist + * + * - genradix_ptr_alloc(radix, idx, gfp) - gets a pointer to an entry, + * allocating it if necessary + * + * - genradix_for_each(radix, iter, p) - iterate over each entry in a genradix + * + * The radix tree allocates one page of entries at a time, so entries may exist + * that were never explicitly allocated - they will be initialized to all + * zeroes. + * + * Internally, a genradix is just a radix tree of pages, and indexing works in + * terms of byte offsets. The wrappers in this header file use sizeof on the + * type the radix contains to calculate a byte offset from the index - see + * __idx_to_offset. + */ + +#include +#include +#include +#include + +struct genradix_root; + +struct __genradix { + struct genradix_root __rcu *root; +}; + +/* + * NOTE: currently, sizeof(_type) must not be larger than PAGE_SIZE: + */ + +#define __GENRADIX_INITIALIZER \ + { \ + .tree = { \ + .root = NULL, \ + } \ + } + +/* + * We use a 0 size array to stash the type we're storing without taking any + * space at runtime - then the various accessor macros can use typeof() to get + * to it for casts/sizeof - we also force the alignment so that storing a type + * with a ridiculous alignment doesn't blow up the alignment or size of the + * genradix. + */ + +#define GENRADIX(_type) \ +struct { \ + struct __genradix tree; \ + _type type[0] __aligned(1); \ +} + +#define DEFINE_GENRADIX(_name, _type) \ + GENRADIX(_type) _name = __GENRADIX_INITIALIZER + +/** + * genradix_init - initialize a genradix + * @_radix: genradix to initialize + * + * Does not fail + */ +#define genradix_init(_radix) \ +do { \ + *(_radix) = (typeof(*_radix)) __GENRADIX_INITIALIZER; \ +} while (0) + +void __genradix_free(struct __genradix *); + +/** + * genradix_free: free all memory owned by a genradix + * @_radix: the genradix to free + * + * After freeing, @_radix will be reinitialized and empty + */ +#define genradix_free(_radix) __genradix_free(&(_radix)->tree) + +static inline size_t __idx_to_offset(size_t idx, size_t obj_size) +{ + if (__builtin_constant_p(obj_size)) + BUILD_BUG_ON(obj_size > PAGE_SIZE); + else + BUG_ON(obj_size > PAGE_SIZE); + + if (!is_power_of_2(obj_size)) { + size_t objs_per_page = PAGE_SIZE / obj_size; + + return (idx / objs_per_page) * PAGE_SIZE + + (idx % objs_per_page) * obj_size; + } else { + return idx * obj_size; + } +} + +#define __genradix_cast(_radix) (typeof((_radix)->type[0]) *) +#define __genradix_obj_size(_radix) sizeof((_radix)->type[0]) +#define __genradix_idx_to_offset(_radix, _idx) \ + __idx_to_offset(_idx, __genradix_obj_size(_radix)) + +void *__genradix_ptr(struct __genradix *, size_t); + +/** + * genradix_ptr - get a pointer to a genradix entry + * @_radix: genradix to access + * @_idx: index to fetch + * + * Returns a pointer to entry at @_idx, or NULL if that entry does not exist. + */ +#define genradix_ptr(_radix, _idx) \ + (__genradix_cast(_radix) \ + __genradix_ptr(&(_radix)->tree, \ + __genradix_idx_to_offset(_radix, _idx))) + +void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t); + +/** + * genradix_ptr_alloc - get a pointer to a genradix entry, allocating it + * if necessary + * @_radix: genradix to access + * @_idx: index to fetch + * @_gfp: gfp mask + * + * Returns a pointer to entry at @_idx, or NULL on allocation failure + */ +#define genradix_ptr_alloc(_radix, _idx, _gfp) \ + (__genradix_cast(_radix) \ + __genradix_ptr_alloc(&(_radix)->tree, \ + __genradix_idx_to_offset(_radix, _idx), \ + _gfp)) + +struct genradix_iter { + size_t offset; + size_t pos; +}; + +/** + * genradix_iter_init - initialize a genradix_iter + * @_radix: genradix that will be iterated over + * @_idx: index to start iterating from + */ +#define genradix_iter_init(_radix, _idx) \ + ((struct genradix_iter) { \ + .pos = (_idx), \ + .offset = __genradix_idx_to_offset((_radix), (_idx)),\ + }) + +void *__genradix_iter_peek(struct genradix_iter *, struct __genradix *, size_t); + +/** + * genradix_iter_peek - get first entry at or above iterator's current + * position + * @_iter: a genradix_iter + * @_radix: genradix being iterated over + * + * If no more entries exist at or above @_iter's current position, returns NULL + */ +#define genradix_iter_peek(_iter, _radix) \ + (__genradix_cast(_radix) \ + __genradix_iter_peek(_iter, &(_radix)->tree, \ + PAGE_SIZE / __genradix_obj_size(_radix))) + +static inline void __genradix_iter_advance(struct genradix_iter *iter, + size_t obj_size) +{ + iter->offset += obj_size; + + if (!is_power_of_2(obj_size) && + (iter->offset & (PAGE_SIZE - 1)) + obj_size > PAGE_SIZE) + iter->offset = round_up(iter->offset, PAGE_SIZE); + + iter->pos++; +} + +#define genradix_iter_advance(_iter, _radix) \ + __genradix_iter_advance(_iter, __genradix_obj_size(_radix)) + +#define genradix_for_each_from(_radix, _iter, _p, _start) \ + for (_iter = genradix_iter_init(_radix, _start); \ + (_p = genradix_iter_peek(&_iter, _radix)) != NULL; \ + genradix_iter_advance(&_iter, _radix)) + +/** + * genradix_for_each - iterate over entry in a genradix + * @_radix: genradix to iterate over + * @_iter: a genradix_iter to track current position + * @_p: pointer to genradix entry type + * + * On every iteration, @_p will point to the current entry, and @_iter.pos + * will be the current entry's index. + */ +#define genradix_for_each(_radix, _iter, _p) \ + genradix_for_each_from(_radix, _iter, _p, 0) + +int __genradix_prealloc(struct __genradix *, size_t, gfp_t); + +/** + * genradix_prealloc - preallocate entries in a generic radix tree + * @_radix: genradix to preallocate + * @_nr: number of entries to preallocate + * @_gfp: gfp mask + * + * Returns 0 on success, -ENOMEM on failure + */ +#define genradix_prealloc(_radix, _nr, _gfp) \ + __genradix_prealloc(&(_radix)->tree, \ + __genradix_idx_to_offset(_radix, _nr + 1),\ + _gfp) + + +#endif /* _LINUX_GENERIC_RADIX_TREE_H */ diff --git a/lib/Makefile b/lib/Makefile index 647517940b29..b798b41d01ae 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -38,7 +38,8 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \ bsearch.o find_bit.o llist.o memweight.o kfifo.o \ percpu-refcount.o rhashtable.o reciprocal_div.o \ - once.o refcount.o usercopy.o errseq.o bucket_locks.o + once.o refcount.o usercopy.o errseq.o bucket_locks.o \ + generic-radix-tree.o obj-$(CONFIG_STRING_SELFTEST) += test_string.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c new file mode 100644 index 000000000000..a7bafc413730 --- /dev/null +++ b/lib/generic-radix-tree.c @@ -0,0 +1,217 @@ + +#include +#include +#include + +#define GENRADIX_ARY (PAGE_SIZE / sizeof(struct genradix_node *)) +#define GENRADIX_ARY_SHIFT ilog2(GENRADIX_ARY) + +struct genradix_node { + union { + /* Interior node: */ + struct genradix_node *children[GENRADIX_ARY]; + + /* Leaf: */ + u8 data[PAGE_SIZE]; + }; +}; + +static inline int genradix_depth_shift(unsigned depth) +{ + return PAGE_SHIFT + GENRADIX_ARY_SHIFT * depth; +} + +/* + * Returns size (of data, in bytes) that a tree of a given depth holds: + */ +static inline size_t genradix_depth_size(unsigned depth) +{ + return 1UL << genradix_depth_shift(depth); +} + +/* depth that's needed for a genradix that can address up to ULONG_MAX: */ +#define GENRADIX_MAX_DEPTH \ + DIV_ROUND_UP(BITS_PER_LONG - PAGE_SHIFT, GENRADIX_ARY_SHIFT) + +#define GENRADIX_DEPTH_MASK \ + ((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1)) + +unsigned genradix_root_to_depth(struct genradix_root *r) +{ + return (unsigned long) r & GENRADIX_DEPTH_MASK; +} + +struct genradix_node *genradix_root_to_node(struct genradix_root *r) +{ + return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK); +} + +/* + * Returns pointer to the specified byte @offset within @radix, or NULL if not + * allocated + */ +void *__genradix_ptr(struct __genradix *radix, size_t offset) +{ + struct genradix_root *r = READ_ONCE(radix->root); + struct genradix_node *n = genradix_root_to_node(r); + unsigned level = genradix_root_to_depth(r); + + if (ilog2(offset) >= genradix_depth_shift(level)) + return NULL; + + while (1) { + if (!n) + return NULL; + if (!level) + break; + + level--; + + n = n->children[offset >> genradix_depth_shift(level)]; + offset &= genradix_depth_size(level) - 1; + } + + return &n->data[offset]; +} +EXPORT_SYMBOL(__genradix_ptr); + +/* + * Returns pointer to the specified byte @offset within @radix, allocating it if + * necessary - newly allocated slots are always zeroed out: + */ +void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset, + gfp_t gfp_mask) +{ + struct genradix_root *v = READ_ONCE(radix->root); + struct genradix_node *n, *new_node = NULL; + unsigned level; + + /* Increase tree depth if necessary: */ + while (1) { + struct genradix_root *r = v, *new_root; + + n = genradix_root_to_node(r); + level = genradix_root_to_depth(r); + + if (n && ilog2(offset) < genradix_depth_shift(level)) + break; + + if (!new_node) { + new_node = (void *) + __get_free_page(gfp_mask|__GFP_ZERO); + if (!new_node) + return NULL; + } + + new_node->children[0] = n; + new_root = ((struct genradix_root *) + ((unsigned long) new_node | (n ? level + 1 : 0))); + + if ((v = cmpxchg_release(&radix->root, r, new_root)) == r) { + v = new_root; + new_node = NULL; + } + } + + while (level--) { + struct genradix_node **p = + &n->children[offset >> genradix_depth_shift(level)]; + offset &= genradix_depth_size(level) - 1; + + n = READ_ONCE(*p); + if (!n) { + if (!new_node) { + new_node = (void *) + __get_free_page(gfp_mask|__GFP_ZERO); + if (!new_node) + return NULL; + } + + if (!(n = cmpxchg_release(p, NULL, new_node))) + swap(n, new_node); + } + } + + if (new_node) + free_page((unsigned long) new_node); + + return &n->data[offset]; +} +EXPORT_SYMBOL(__genradix_ptr_alloc); + +void *__genradix_iter_peek(struct genradix_iter *iter, + struct __genradix *radix, + size_t objs_per_page) +{ + struct genradix_root *r; + struct genradix_node *n; + unsigned level, i; +restart: + r = READ_ONCE(radix->root); + if (!r) + return NULL; + + n = genradix_root_to_node(r); + level = genradix_root_to_depth(r); + + if (ilog2(iter->offset) >= genradix_depth_shift(level)) + return NULL; + + while (level) { + level--; + + i = (iter->offset >> genradix_depth_shift(level)) & + (GENRADIX_ARY - 1); + + while (!n->children[i]) { + i++; + iter->offset = round_down(iter->offset + + genradix_depth_size(level), + genradix_depth_size(level)); + iter->pos = (iter->offset >> PAGE_SHIFT) * + objs_per_page; + if (i == GENRADIX_ARY) + goto restart; + } + + n = n->children[i]; + } + + return &n->data[iter->offset & (PAGE_SIZE - 1)]; +} +EXPORT_SYMBOL(__genradix_iter_peek); + +static void genradix_free_recurse(struct genradix_node *n, unsigned level) +{ + if (level) { + unsigned i; + + for (i = 0; i < GENRADIX_ARY; i++) + if (n->children[i]) + genradix_free_recurse(n->children[i], level - 1); + } + + free_page((unsigned long) n); +} + +int __genradix_prealloc(struct __genradix *radix, size_t size, + gfp_t gfp_mask) +{ + size_t offset; + + for (offset = 0; offset < size; offset += PAGE_SIZE) + if (!__genradix_ptr_alloc(radix, offset, gfp_mask)) + return -ENOMEM; + + return 0; +} +EXPORT_SYMBOL(__genradix_prealloc); + +void __genradix_free(struct __genradix *radix) +{ + struct genradix_root *r = xchg(&radix->root, NULL); + + genradix_free_recurse(genradix_root_to_node(r), + genradix_root_to_depth(r)); +} +EXPORT_SYMBOL(__genradix_free); -- cgit v1.2.3-59-g8ed1b From 2075e50caf5ea28be3cba0d01b3058bb5c3b0168 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 11 Mar 2019 23:31:22 -0700 Subject: sctp: convert to genradix This also makes sctp_stream_alloc_(out|in) saner, in that they no longer allocate new flex_arrays/genradixes, they just preallocate more elements. This code does however have a suspicious lack of locking. Link: http://lkml.kernel.org/r/20181217131929.11727-7-kent.overstreet@gmail.com Signed-off-by: Kent Overstreet Cc: Vlad Yasevich Cc: Neil Horman Cc: Marcelo Ricardo Leitner Cc: Alexey Dobriyan Cc: Al Viro Cc: Dave Hansen Cc: Eric Paris Cc: Matthew Wilcox Cc: Paul Moore Cc: Pravin B Shelar Cc: Shaohua Li Cc: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/net/sctp/structs.h | 15 +++--- net/sctp/stream.c | 125 +++++++------------------------------------ net/sctp/stream_interleave.c | 2 +- 3 files changed, 28 insertions(+), 114 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 58e4b23cecf4..140fd836a396 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -48,6 +48,7 @@ #define __sctp_structs_h__ #include +#include #include #include /* linux/in.h needs this!! */ #include /* We get struct sockaddr_in. */ @@ -57,7 +58,6 @@ #include /* This gets us atomic counters. */ #include /* We need sk_buff_head. */ #include /* We need tq_struct. */ -#include /* We need flex_array. */ #include /* We need sctp* header structs. */ #include /* We need auth specific structs */ #include /* For inet_skb_parm */ @@ -1449,8 +1449,9 @@ struct sctp_stream_in { }; struct sctp_stream { - struct flex_array *out; - struct flex_array *in; + GENRADIX(struct sctp_stream_out) out; + GENRADIX(struct sctp_stream_in) in; + __u16 outcnt; __u16 incnt; /* Current stream being sent, if any */ @@ -1473,17 +1474,17 @@ struct sctp_stream { }; static inline struct sctp_stream_out *sctp_stream_out( - const struct sctp_stream *stream, + struct sctp_stream *stream, __u16 sid) { - return flex_array_get(stream->out, sid); + return genradix_ptr(&stream->out, sid); } static inline struct sctp_stream_in *sctp_stream_in( - const struct sctp_stream *stream, + struct sctp_stream *stream, __u16 sid) { - return flex_array_get(stream->in, sid); + return genradix_ptr(&stream->in, sid); } #define SCTP_SO(s, i) sctp_stream_out((s), (i)) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 3b47457862cc..b6bb68adac6e 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -37,66 +37,6 @@ #include #include -static struct flex_array *fa_alloc(size_t elem_size, size_t elem_count, - gfp_t gfp) -{ - struct flex_array *result; - int err; - - result = flex_array_alloc(elem_size, elem_count, gfp); - if (result) { - err = flex_array_prealloc(result, 0, elem_count, gfp); - if (err) { - flex_array_free(result); - result = NULL; - } - } - - return result; -} - -static void fa_free(struct flex_array *fa) -{ - if (fa) - flex_array_free(fa); -} - -static void fa_copy(struct flex_array *fa, struct flex_array *from, - size_t index, size_t count) -{ - void *elem; - - while (count--) { - elem = flex_array_get(from, index); - flex_array_put(fa, index, elem, 0); - index++; - } -} - -static void fa_zero(struct flex_array *fa, size_t index, size_t count) -{ - void *elem; - - while (count--) { - elem = flex_array_get(fa, index); - memset(elem, 0, fa->element_size); - index++; - } -} - -static size_t fa_index(struct flex_array *fa, void *elem, size_t count) -{ - size_t index = 0; - - while (count--) { - if (elem == flex_array_get(fa, index)) - break; - index++; - } - - return index; -} - /* Migrates chunks from stream queues to new stream queues if needed, * but not across associations. Also, removes those chunks to streams * higher than the new max. @@ -153,53 +93,32 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream, static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, gfp_t gfp) { - struct flex_array *out; - size_t elem_size = sizeof(struct sctp_stream_out); - - out = fa_alloc(elem_size, outcnt, gfp); - if (!out) - return -ENOMEM; - - if (stream->out) { - fa_copy(out, stream->out, 0, min(outcnt, stream->outcnt)); - if (stream->out_curr) { - size_t index = fa_index(stream->out, stream->out_curr, - stream->outcnt); - - BUG_ON(index == stream->outcnt); - stream->out_curr = flex_array_get(out, index); - } - fa_free(stream->out); - } + int ret; - if (outcnt > stream->outcnt) - fa_zero(out, stream->outcnt, (outcnt - stream->outcnt)); + if (outcnt <= stream->outcnt) + return 0; - stream->out = out; + ret = genradix_prealloc(&stream->out, outcnt, gfp); + if (ret) + return ret; + stream->outcnt = outcnt; return 0; } static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt, gfp_t gfp) { - struct flex_array *in; - size_t elem_size = sizeof(struct sctp_stream_in); - - in = fa_alloc(elem_size, incnt, gfp); - if (!in) - return -ENOMEM; - - if (stream->in) { - fa_copy(in, stream->in, 0, min(incnt, stream->incnt)); - fa_free(stream->in); - } + int ret; - if (incnt > stream->incnt) - fa_zero(in, stream->incnt, (incnt - stream->incnt)); + if (incnt <= stream->incnt) + return 0; - stream->in = in; + ret = genradix_prealloc(&stream->in, incnt, gfp); + if (ret) + return ret; + stream->incnt = incnt; return 0; } @@ -226,7 +145,6 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, if (ret) goto out; - stream->outcnt = outcnt; for (i = 0; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; @@ -238,14 +156,11 @@ in: ret = sctp_stream_alloc_in(stream, incnt, gfp); if (ret) { sched->free(stream); - fa_free(stream->out); - stream->out = NULL; + genradix_free(&stream->out); stream->outcnt = 0; goto out; } - stream->incnt = incnt; - out: return ret; } @@ -270,8 +185,8 @@ void sctp_stream_free(struct sctp_stream *stream) sched->free(stream); for (i = 0; i < stream->outcnt; i++) kfree(SCTP_SO(stream, i)->ext); - fa_free(stream->out); - fa_free(stream->in); + genradix_free(&stream->out); + genradix_free(&stream->in); } void sctp_stream_clear(struct sctp_stream *stream) @@ -302,8 +217,8 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new) sched->sched_all(stream); - new->out = NULL; - new->in = NULL; + new->out.tree.root = NULL; + new->in.tree.root = NULL; new->outcnt = 0; new->incnt = 0; } @@ -555,8 +470,6 @@ int sctp_send_add_streams(struct sctp_association *asoc, goto out; } - stream->outcnt = outcnt; - asoc->strreset_outstanding = !!out + !!in; out: diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c index a6bf21579466..102c6fefe38c 100644 --- a/net/sctp/stream_interleave.c +++ b/net/sctp/stream_interleave.c @@ -101,7 +101,7 @@ static void sctp_chunk_assign_mid(struct sctp_chunk *chunk) static bool sctp_validate_data(struct sctp_chunk *chunk) { - const struct sctp_stream *stream; + struct sctp_stream *stream; __u16 sid, ssn; if (chunk->chunk_hdr->type != SCTP_CID_DATA) -- cgit v1.2.3-59-g8ed1b From 586187d7de71b4da7956ba588ae42253b9ff6482 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 11 Mar 2019 23:31:26 -0700 Subject: Drop flex_arrays All existing users have been converted to generic radix trees Link: http://lkml.kernel.org/r/20181217131929.11727-8-kent.overstreet@gmail.com Signed-off-by: Kent Overstreet Acked-by: Dave Hansen Cc: Alexey Dobriyan Cc: Al Viro Cc: Eric Paris Cc: Marcelo Ricardo Leitner Cc: Matthew Wilcox Cc: Neil Horman Cc: Paul Moore Cc: Pravin B Shelar Cc: Shaohua Li Cc: Stephen Smalley Cc: Vlad Yasevich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/core-api/flexible-arrays.rst | 130 ---------- Documentation/flexible-arrays.txt | 123 --------- include/linux/flex_array.h | 149 ----------- include/linux/poison.h | 3 - lib/Makefile | 2 +- lib/flex_array.c | 398 ----------------------------- tools/include/linux/poison.h | 3 - 7 files changed, 1 insertion(+), 807 deletions(-) delete mode 100644 Documentation/core-api/flexible-arrays.rst delete mode 100644 Documentation/flexible-arrays.txt delete mode 100644 include/linux/flex_array.h delete mode 100644 lib/flex_array.c (limited to 'include') diff --git a/Documentation/core-api/flexible-arrays.rst b/Documentation/core-api/flexible-arrays.rst deleted file mode 100644 index b6b85a1b518e..000000000000 --- a/Documentation/core-api/flexible-arrays.rst +++ /dev/null @@ -1,130 +0,0 @@ - -=================================== -Using flexible arrays in the kernel -=================================== - -Large contiguous memory allocations can be unreliable in the Linux kernel. -Kernel programmers will sometimes respond to this problem by allocating -pages with :c:func:`vmalloc()`. This solution not ideal, though. On 32-bit -systems, memory from vmalloc() must be mapped into a relatively small address -space; it's easy to run out. On SMP systems, the page table changes required -by vmalloc() allocations can require expensive cross-processor interrupts on -all CPUs. And, on all systems, use of space in the vmalloc() range increases -pressure on the translation lookaside buffer (TLB), reducing the performance -of the system. - -In many cases, the need for memory from vmalloc() can be eliminated by piecing -together an array from smaller parts; the flexible array library exists to make -this task easier. - -A flexible array holds an arbitrary (within limits) number of fixed-sized -objects, accessed via an integer index. Sparse arrays are handled -reasonably well. Only single-page allocations are made, so memory -allocation failures should be relatively rare. The down sides are that the -arrays cannot be indexed directly, individual object size cannot exceed the -system page size, and putting data into a flexible array requires a copy -operation. It's also worth noting that flexible arrays do no internal -locking at all; if concurrent access to an array is possible, then the -caller must arrange for appropriate mutual exclusion. - -The creation of a flexible array is done with :c:func:`flex_array_alloc()`:: - - #include - - struct flex_array *flex_array_alloc(int element_size, - unsigned int total, - gfp_t flags); - -The individual object size is provided by ``element_size``, while total is the -maximum number of objects which can be stored in the array. The flags -argument is passed directly to the internal memory allocation calls. With -the current code, using flags to ask for high memory is likely to lead to -notably unpleasant side effects. - -It is also possible to define flexible arrays at compile time with:: - - DEFINE_FLEX_ARRAY(name, element_size, total); - -This macro will result in a definition of an array with the given name; the -element size and total will be checked for validity at compile time. - -Storing data into a flexible array is accomplished with a call to -:c:func:`flex_array_put()`:: - - int flex_array_put(struct flex_array *array, unsigned int element_nr, - void *src, gfp_t flags); - -This call will copy the data from src into the array, in the position -indicated by ``element_nr`` (which must be less than the maximum specified when -the array was created). If any memory allocations must be performed, flags -will be used. The return value is zero on success, a negative error code -otherwise. - -There might possibly be a need to store data into a flexible array while -running in some sort of atomic context; in this situation, sleeping in the -memory allocator would be a bad thing. That can be avoided by using -``GFP_ATOMIC`` for the flags value, but, often, there is a better way. The -trick is to ensure that any needed memory allocations are done before -entering atomic context, using :c:func:`flex_array_prealloc()`:: - - int flex_array_prealloc(struct flex_array *array, unsigned int start, - unsigned int nr_elements, gfp_t flags); - -This function will ensure that memory for the elements indexed in the range -defined by ``start`` and ``nr_elements`` has been allocated. Thereafter, a -``flex_array_put()`` call on an element in that range is guaranteed not to -block. - -Getting data back out of the array is done with :c:func:`flex_array_get()`:: - - void *flex_array_get(struct flex_array *fa, unsigned int element_nr); - -The return value is a pointer to the data element, or NULL if that -particular element has never been allocated. - -Note that it is possible to get back a valid pointer for an element which -has never been stored in the array. Memory for array elements is allocated -one page at a time; a single allocation could provide memory for several -adjacent elements. Flexible array elements are normally initialized to the -value ``FLEX_ARRAY_FREE`` (defined as 0x6c in ), so errors -involving that number probably result from use of unstored array entries. -Note that, if array elements are allocated with ``__GFP_ZERO``, they will be -initialized to zero and this poisoning will not happen. - -Individual elements in the array can be cleared with -:c:func:`flex_array_clear()`:: - - int flex_array_clear(struct flex_array *array, unsigned int element_nr); - -This function will set the given element to ``FLEX_ARRAY_FREE`` and return -zero. If storage for the indicated element is not allocated for the array, -``flex_array_clear()`` will return ``-EINVAL`` instead. Note that clearing an -element does not release the storage associated with it; to reduce the -allocated size of an array, call :c:func:`flex_array_shrink()`:: - - int flex_array_shrink(struct flex_array *array); - -The return value will be the number of pages of memory actually freed. -This function works by scanning the array for pages containing nothing but -``FLEX_ARRAY_FREE`` bytes, so (1) it can be expensive, and (2) it will not work -if the array's pages are allocated with ``__GFP_ZERO``. - -It is possible to remove all elements of an array with a call to -:c:func:`flex_array_free_parts()`:: - - void flex_array_free_parts(struct flex_array *array); - -This call frees all elements, but leaves the array itself in place. -Freeing the entire array is done with :c:func:`flex_array_free()`:: - - void flex_array_free(struct flex_array *array); - -As of this writing, there are no users of flexible arrays in the mainline -kernel. The functions described here are also not exported to modules; -that will probably be fixed when somebody comes up with a need for it. - - -Flexible array functions ------------------------- - -.. kernel-doc:: include/linux/flex_array.h diff --git a/Documentation/flexible-arrays.txt b/Documentation/flexible-arrays.txt deleted file mode 100644 index a0f2989dd804..000000000000 --- a/Documentation/flexible-arrays.txt +++ /dev/null @@ -1,123 +0,0 @@ -=================================== -Using flexible arrays in the kernel -=================================== - -:Updated: Last updated for 2.6.32 -:Author: Jonathan Corbet - -Large contiguous memory allocations can be unreliable in the Linux kernel. -Kernel programmers will sometimes respond to this problem by allocating -pages with vmalloc(). This solution not ideal, though. On 32-bit systems, -memory from vmalloc() must be mapped into a relatively small address space; -it's easy to run out. On SMP systems, the page table changes required by -vmalloc() allocations can require expensive cross-processor interrupts on -all CPUs. And, on all systems, use of space in the vmalloc() range -increases pressure on the translation lookaside buffer (TLB), reducing the -performance of the system. - -In many cases, the need for memory from vmalloc() can be eliminated by -piecing together an array from smaller parts; the flexible array library -exists to make this task easier. - -A flexible array holds an arbitrary (within limits) number of fixed-sized -objects, accessed via an integer index. Sparse arrays are handled -reasonably well. Only single-page allocations are made, so memory -allocation failures should be relatively rare. The down sides are that the -arrays cannot be indexed directly, individual object size cannot exceed the -system page size, and putting data into a flexible array requires a copy -operation. It's also worth noting that flexible arrays do no internal -locking at all; if concurrent access to an array is possible, then the -caller must arrange for appropriate mutual exclusion. - -The creation of a flexible array is done with:: - - #include - - struct flex_array *flex_array_alloc(int element_size, - unsigned int total, - gfp_t flags); - -The individual object size is provided by element_size, while total is the -maximum number of objects which can be stored in the array. The flags -argument is passed directly to the internal memory allocation calls. With -the current code, using flags to ask for high memory is likely to lead to -notably unpleasant side effects. - -It is also possible to define flexible arrays at compile time with:: - - DEFINE_FLEX_ARRAY(name, element_size, total); - -This macro will result in a definition of an array with the given name; the -element size and total will be checked for validity at compile time. - -Storing data into a flexible array is accomplished with a call to:: - - int flex_array_put(struct flex_array *array, unsigned int element_nr, - void *src, gfp_t flags); - -This call will copy the data from src into the array, in the position -indicated by element_nr (which must be less than the maximum specified when -the array was created). If any memory allocations must be performed, flags -will be used. The return value is zero on success, a negative error code -otherwise. - -There might possibly be a need to store data into a flexible array while -running in some sort of atomic context; in this situation, sleeping in the -memory allocator would be a bad thing. That can be avoided by using -GFP_ATOMIC for the flags value, but, often, there is a better way. The -trick is to ensure that any needed memory allocations are done before -entering atomic context, using:: - - int flex_array_prealloc(struct flex_array *array, unsigned int start, - unsigned int nr_elements, gfp_t flags); - -This function will ensure that memory for the elements indexed in the range -defined by start and nr_elements has been allocated. Thereafter, a -flex_array_put() call on an element in that range is guaranteed not to -block. - -Getting data back out of the array is done with:: - - void *flex_array_get(struct flex_array *fa, unsigned int element_nr); - -The return value is a pointer to the data element, or NULL if that -particular element has never been allocated. - -Note that it is possible to get back a valid pointer for an element which -has never been stored in the array. Memory for array elements is allocated -one page at a time; a single allocation could provide memory for several -adjacent elements. Flexible array elements are normally initialized to the -value FLEX_ARRAY_FREE (defined as 0x6c in ), so errors -involving that number probably result from use of unstored array entries. -Note that, if array elements are allocated with __GFP_ZERO, they will be -initialized to zero and this poisoning will not happen. - -Individual elements in the array can be cleared with:: - - int flex_array_clear(struct flex_array *array, unsigned int element_nr); - -This function will set the given element to FLEX_ARRAY_FREE and return -zero. If storage for the indicated element is not allocated for the array, -flex_array_clear() will return -EINVAL instead. Note that clearing an -element does not release the storage associated with it; to reduce the -allocated size of an array, call:: - - int flex_array_shrink(struct flex_array *array); - -The return value will be the number of pages of memory actually freed. -This function works by scanning the array for pages containing nothing but -FLEX_ARRAY_FREE bytes, so (1) it can be expensive, and (2) it will not work -if the array's pages are allocated with __GFP_ZERO. - -It is possible to remove all elements of an array with a call to:: - - void flex_array_free_parts(struct flex_array *array); - -This call frees all elements, but leaves the array itself in place. -Freeing the entire array is done with:: - - void flex_array_free(struct flex_array *array); - -As of this writing, there are no users of flexible arrays in the mainline -kernel. The functions described here are also not exported to modules; -that will probably be fixed when somebody comes up with a need for it. diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h deleted file mode 100644 index b94fa61b51fb..000000000000 --- a/include/linux/flex_array.h +++ /dev/null @@ -1,149 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _FLEX_ARRAY_H -#define _FLEX_ARRAY_H - -#include -#include -#include - -#define FLEX_ARRAY_PART_SIZE PAGE_SIZE -#define FLEX_ARRAY_BASE_SIZE PAGE_SIZE - -struct flex_array_part; - -/* - * This is meant to replace cases where an array-like - * structure has gotten too big to fit into kmalloc() - * and the developer is getting tempted to use - * vmalloc(). - */ - -struct flex_array { - union { - struct { - int element_size; - int total_nr_elements; - int elems_per_part; - struct reciprocal_value reciprocal_elems; - struct flex_array_part *parts[]; - }; - /* - * This little trick makes sure that - * sizeof(flex_array) == PAGE_SIZE - */ - char padding[FLEX_ARRAY_BASE_SIZE]; - }; -}; - -/* Number of bytes left in base struct flex_array, excluding metadata */ -#define FLEX_ARRAY_BASE_BYTES_LEFT \ - (FLEX_ARRAY_BASE_SIZE - offsetof(struct flex_array, parts)) - -/* Number of pointers in base to struct flex_array_part pages */ -#define FLEX_ARRAY_NR_BASE_PTRS \ - (FLEX_ARRAY_BASE_BYTES_LEFT / sizeof(struct flex_array_part *)) - -/* Number of elements of size that fit in struct flex_array_part */ -#define FLEX_ARRAY_ELEMENTS_PER_PART(size) \ - (FLEX_ARRAY_PART_SIZE / size) - -/* - * Defines a statically allocated flex array and ensures its parameters are - * valid. - */ -#define DEFINE_FLEX_ARRAY(__arrayname, __element_size, __total) \ - struct flex_array __arrayname = { { { \ - .element_size = (__element_size), \ - .total_nr_elements = (__total), \ - } } }; \ - static inline void __arrayname##_invalid_parameter(void) \ - { \ - BUILD_BUG_ON((__total) > FLEX_ARRAY_NR_BASE_PTRS * \ - FLEX_ARRAY_ELEMENTS_PER_PART(__element_size)); \ - } - -/** - * flex_array_alloc() - Creates a flexible array. - * @element_size: individual object size. - * @total: maximum number of objects which can be stored. - * @flags: GFP flags - * - * Return: Returns an object of structure flex_array. - */ -struct flex_array *flex_array_alloc(int element_size, unsigned int total, - gfp_t flags); - -/** - * flex_array_prealloc() - Ensures that memory for the elements indexed in the - * range defined by start and nr_elements has been allocated. - * @fa: array to allocate memory to. - * @start: start address - * @nr_elements: number of elements to be allocated. - * @flags: GFP flags - * - */ -int flex_array_prealloc(struct flex_array *fa, unsigned int start, - unsigned int nr_elements, gfp_t flags); - -/** - * flex_array_free() - Removes all elements of a flexible array. - * @fa: array to be freed. - */ -void flex_array_free(struct flex_array *fa); - -/** - * flex_array_free_parts() - Removes all elements of a flexible array, but - * leaves the array itself in place. - * @fa: array to be emptied. - */ -void flex_array_free_parts(struct flex_array *fa); - -/** - * flex_array_put() - Stores data into a flexible array. - * @fa: array where element is to be stored. - * @element_nr: position to copy, must be less than the maximum specified when - * the array was created. - * @src: data source to be copied into the array. - * @flags: GFP flags - * - * Return: Returns zero on success, a negative error code otherwise. - */ -int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, - gfp_t flags); - -/** - * flex_array_clear() - Clears an individual element in the array, sets the - * given element to FLEX_ARRAY_FREE. - * @element_nr: element position to clear. - * @fa: array to which element to be cleared belongs. - * - * Return: Returns zero on success, -EINVAL otherwise. - */ -int flex_array_clear(struct flex_array *fa, unsigned int element_nr); - -/** - * flex_array_get() - Retrieves data into a flexible array. - * - * @element_nr: Element position to retrieve data from. - * @fa: array from which data is to be retrieved. - * - * Return: Returns a pointer to the data element, or NULL if that - * particular element has never been allocated. - */ -void *flex_array_get(struct flex_array *fa, unsigned int element_nr); - -/** - * flex_array_shrink() - Reduces the allocated size of an array. - * @fa: array to shrink. - * - * Return: Returns number of pages of memory actually freed. - * - */ -int flex_array_shrink(struct flex_array *fa); - -#define flex_array_put_ptr(fa, nr, src, gfp) \ - flex_array_put(fa, nr, (void *)&(src), gfp) - -void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr); - -#endif /* _FLEX_ARRAY_H */ diff --git a/include/linux/poison.h b/include/linux/poison.h index 5046bad0c1c5..d6d980a681c7 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -83,9 +83,6 @@ #define MUTEX_DEBUG_FREE 0x22 #define MUTEX_POISON_WW_CTX ((void *) 0x500 + POISON_POINTER_DELTA) -/********** lib/flex_array.c **********/ -#define FLEX_ARRAY_FREE 0x6c /* for use-after-free poisoning */ - /********** security/ **********/ #define KEY_DESTROY 0xbd diff --git a/lib/Makefile b/lib/Makefile index b798b41d01ae..4e066120a0d6 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -35,7 +35,7 @@ obj-y += lockref.o obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \ bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \ - gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \ + gcd.o lcm.o list_sort.o uuid.o iov_iter.o clz_ctz.o \ bsearch.o find_bit.o llist.o memweight.o kfifo.o \ percpu-refcount.o rhashtable.o reciprocal_div.o \ once.o refcount.o usercopy.o errseq.o bucket_locks.o \ diff --git a/lib/flex_array.c b/lib/flex_array.c deleted file mode 100644 index 2eed22fa507c..000000000000 --- a/lib/flex_array.c +++ /dev/null @@ -1,398 +0,0 @@ -/* - * Flexible array managed in PAGE_SIZE parts - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright IBM Corporation, 2009 - * - * Author: Dave Hansen - */ - -#include -#include -#include -#include -#include - -struct flex_array_part { - char elements[FLEX_ARRAY_PART_SIZE]; -}; - -/* - * If a user requests an allocation which is small - * enough, we may simply use the space in the - * flex_array->parts[] array to store the user - * data. - */ -static inline int elements_fit_in_base(struct flex_array *fa) -{ - int data_size = fa->element_size * fa->total_nr_elements; - if (data_size <= FLEX_ARRAY_BASE_BYTES_LEFT) - return 1; - return 0; -} - -/** - * flex_array_alloc - allocate a new flexible array - * @element_size: the size of individual elements in the array - * @total: total number of elements that this should hold - * @flags: page allocation flags to use for base array - * - * Note: all locking must be provided by the caller. - * - * @total is used to size internal structures. If the user ever - * accesses any array indexes >=@total, it will produce errors. - * - * The maximum number of elements is defined as: the number of - * elements that can be stored in a page times the number of - * page pointers that we can fit in the base structure or (using - * integer math): - * - * (PAGE_SIZE/element_size) * (PAGE_SIZE-8)/sizeof(void *) - * - * Here's a table showing example capacities. Note that the maximum - * index that the get/put() functions is just nr_objects-1. This - * basically means that you get 4MB of storage on 32-bit and 2MB on - * 64-bit. - * - * - * Element size | Objects | Objects | - * PAGE_SIZE=4k | 32-bit | 64-bit | - * ---------------------------------| - * 1 bytes | 4177920 | 2088960 | - * 2 bytes | 2088960 | 1044480 | - * 3 bytes | 1392300 | 696150 | - * 4 bytes | 1044480 | 522240 | - * 32 bytes | 130560 | 65408 | - * 33 bytes | 126480 | 63240 | - * 2048 bytes | 2040 | 1020 | - * 2049 bytes | 1020 | 510 | - * void * | 1044480 | 261120 | - * - * Since 64-bit pointers are twice the size, we lose half the - * capacity in the base structure. Also note that no effort is made - * to efficiently pack objects across page boundaries. - */ -struct flex_array *flex_array_alloc(int element_size, unsigned int total, - gfp_t flags) -{ - struct flex_array *ret; - int elems_per_part = 0; - int max_size = 0; - struct reciprocal_value reciprocal_elems = { 0 }; - - if (element_size) { - elems_per_part = FLEX_ARRAY_ELEMENTS_PER_PART(element_size); - reciprocal_elems = reciprocal_value(elems_per_part); - max_size = FLEX_ARRAY_NR_BASE_PTRS * elems_per_part; - } - - /* max_size will end up 0 if element_size > PAGE_SIZE */ - if (total > max_size) - return NULL; - ret = kzalloc(sizeof(struct flex_array), flags); - if (!ret) - return NULL; - ret->element_size = element_size; - ret->total_nr_elements = total; - ret->elems_per_part = elems_per_part; - ret->reciprocal_elems = reciprocal_elems; - if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO)) - memset(&ret->parts[0], FLEX_ARRAY_FREE, - FLEX_ARRAY_BASE_BYTES_LEFT); - return ret; -} -EXPORT_SYMBOL(flex_array_alloc); - -static int fa_element_to_part_nr(struct flex_array *fa, - unsigned int element_nr) -{ - /* - * if element_size == 0 we don't get here, so we never touch - * the zeroed fa->reciprocal_elems, which would yield invalid - * results - */ - return reciprocal_divide(element_nr, fa->reciprocal_elems); -} - -/** - * flex_array_free_parts - just free the second-level pages - * @fa: the flex array from which to free parts - * - * This is to be used in cases where the base 'struct flex_array' - * has been statically allocated and should not be free. - */ -void flex_array_free_parts(struct flex_array *fa) -{ - int part_nr; - - if (elements_fit_in_base(fa)) - return; - for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) - kfree(fa->parts[part_nr]); -} -EXPORT_SYMBOL(flex_array_free_parts); - -void flex_array_free(struct flex_array *fa) -{ - flex_array_free_parts(fa); - kfree(fa); -} -EXPORT_SYMBOL(flex_array_free); - -static unsigned int index_inside_part(struct flex_array *fa, - unsigned int element_nr, - unsigned int part_nr) -{ - unsigned int part_offset; - - part_offset = element_nr - part_nr * fa->elems_per_part; - return part_offset * fa->element_size; -} - -static struct flex_array_part * -__fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags) -{ - struct flex_array_part *part = fa->parts[part_nr]; - if (!part) { - part = kmalloc(sizeof(struct flex_array_part), flags); - if (!part) - return NULL; - if (!(flags & __GFP_ZERO)) - memset(part, FLEX_ARRAY_FREE, - sizeof(struct flex_array_part)); - fa->parts[part_nr] = part; - } - return part; -} - -/** - * flex_array_put - copy data into the array at @element_nr - * @fa: the flex array to copy data into - * @element_nr: index of the position in which to insert - * the new element. - * @src: address of data to copy into the array - * @flags: page allocation flags to use for array expansion - * - * - * Note that this *copies* the contents of @src into - * the array. If you are trying to store an array of - * pointers, make sure to pass in &ptr instead of ptr. - * You may instead wish to use the flex_array_put_ptr() - * helper function. - * - * Locking must be provided by the caller. - */ -int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, - gfp_t flags) -{ - int part_nr = 0; - struct flex_array_part *part; - void *dst; - - if (element_nr >= fa->total_nr_elements) - return -ENOSPC; - if (!fa->element_size) - return 0; - if (elements_fit_in_base(fa)) - part = (struct flex_array_part *)&fa->parts[0]; - else { - part_nr = fa_element_to_part_nr(fa, element_nr); - part = __fa_get_part(fa, part_nr, flags); - if (!part) - return -ENOMEM; - } - dst = &part->elements[index_inside_part(fa, element_nr, part_nr)]; - memcpy(dst, src, fa->element_size); - return 0; -} -EXPORT_SYMBOL(flex_array_put); - -/** - * flex_array_clear - clear element in array at @element_nr - * @fa: the flex array of the element. - * @element_nr: index of the position to clear. - * - * Locking must be provided by the caller. - */ -int flex_array_clear(struct flex_array *fa, unsigned int element_nr) -{ - int part_nr = 0; - struct flex_array_part *part; - void *dst; - - if (element_nr >= fa->total_nr_elements) - return -ENOSPC; - if (!fa->element_size) - return 0; - if (elements_fit_in_base(fa)) - part = (struct flex_array_part *)&fa->parts[0]; - else { - part_nr = fa_element_to_part_nr(fa, element_nr); - part = fa->parts[part_nr]; - if (!part) - return -EINVAL; - } - dst = &part->elements[index_inside_part(fa, element_nr, part_nr)]; - memset(dst, FLEX_ARRAY_FREE, fa->element_size); - return 0; -} -EXPORT_SYMBOL(flex_array_clear); - -/** - * flex_array_prealloc - guarantee that array space exists - * @fa: the flex array for which to preallocate parts - * @start: index of first array element for which space is allocated - * @nr_elements: number of elements for which space is allocated - * @flags: page allocation flags - * - * This will guarantee that no future calls to flex_array_put() - * will allocate memory. It can be used if you are expecting to - * be holding a lock or in some atomic context while writing - * data into the array. - * - * Locking must be provided by the caller. - */ -int flex_array_prealloc(struct flex_array *fa, unsigned int start, - unsigned int nr_elements, gfp_t flags) -{ - int start_part; - int end_part; - int part_nr; - unsigned int end; - struct flex_array_part *part; - - if (!start && !nr_elements) - return 0; - if (start >= fa->total_nr_elements) - return -ENOSPC; - if (!nr_elements) - return 0; - - end = start + nr_elements - 1; - - if (end >= fa->total_nr_elements) - return -ENOSPC; - if (!fa->element_size) - return 0; - if (elements_fit_in_base(fa)) - return 0; - start_part = fa_element_to_part_nr(fa, start); - end_part = fa_element_to_part_nr(fa, end); - for (part_nr = start_part; part_nr <= end_part; part_nr++) { - part = __fa_get_part(fa, part_nr, flags); - if (!part) - return -ENOMEM; - } - return 0; -} -EXPORT_SYMBOL(flex_array_prealloc); - -/** - * flex_array_get - pull data back out of the array - * @fa: the flex array from which to extract data - * @element_nr: index of the element to fetch from the array - * - * Returns a pointer to the data at index @element_nr. Note - * that this is a copy of the data that was passed in. If you - * are using this to store pointers, you'll get back &ptr. You - * may instead wish to use the flex_array_get_ptr helper. - * - * Locking must be provided by the caller. - */ -void *flex_array_get(struct flex_array *fa, unsigned int element_nr) -{ - int part_nr = 0; - struct flex_array_part *part; - - if (!fa->element_size) - return NULL; - if (element_nr >= fa->total_nr_elements) - return NULL; - if (elements_fit_in_base(fa)) - part = (struct flex_array_part *)&fa->parts[0]; - else { - part_nr = fa_element_to_part_nr(fa, element_nr); - part = fa->parts[part_nr]; - if (!part) - return NULL; - } - return &part->elements[index_inside_part(fa, element_nr, part_nr)]; -} -EXPORT_SYMBOL(flex_array_get); - -/** - * flex_array_get_ptr - pull a ptr back out of the array - * @fa: the flex array from which to extract data - * @element_nr: index of the element to fetch from the array - * - * Returns the pointer placed in the flex array at element_nr using - * flex_array_put_ptr(). This function should not be called if the - * element in question was not set using the _put_ptr() helper. - */ -void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr) -{ - void **tmp; - - tmp = flex_array_get(fa, element_nr); - if (!tmp) - return NULL; - - return *tmp; -} -EXPORT_SYMBOL(flex_array_get_ptr); - -static int part_is_free(struct flex_array_part *part) -{ - int i; - - for (i = 0; i < sizeof(struct flex_array_part); i++) - if (part->elements[i] != FLEX_ARRAY_FREE) - return 0; - return 1; -} - -/** - * flex_array_shrink - free unused second-level pages - * @fa: the flex array to shrink - * - * Frees all second-level pages that consist solely of unused - * elements. Returns the number of pages freed. - * - * Locking must be provided by the caller. - */ -int flex_array_shrink(struct flex_array *fa) -{ - struct flex_array_part *part; - int part_nr; - int ret = 0; - - if (!fa->total_nr_elements || !fa->element_size) - return 0; - if (elements_fit_in_base(fa)) - return ret; - for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) { - part = fa->parts[part_nr]; - if (!part) - continue; - if (part_is_free(part)) { - fa->parts[part_nr] = NULL; - kfree(part); - ret++; - } - } - return ret; -} -EXPORT_SYMBOL(flex_array_shrink); diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h index 9fdcd3eaac3b..d29725769107 100644 --- a/tools/include/linux/poison.h +++ b/tools/include/linux/poison.h @@ -87,9 +87,6 @@ #define MUTEX_DEBUG_INIT 0x11 #define MUTEX_DEBUG_FREE 0x22 -/********** lib/flex_array.c **********/ -#define FLEX_ARRAY_FREE 0x6c /* for use-after-free poisoning */ - /********** security/ **********/ #define KEY_DESTROY 0xbd -- cgit v1.2.3-59-g8ed1b