From ed2f240094f900833ac06f533ab8bbcf0a1e8199 Mon Sep 17 00:00:00 2001 From: Zhang Zhen Date: Thu, 9 Oct 2014 15:26:31 -0700 Subject: memory-hotplug: add sysfs valid_zones attribute Currently memory-hotplug has two limits: 1. If the memory block is in ZONE_NORMAL, you can change it to ZONE_MOVABLE, but this memory block must be adjacent to ZONE_MOVABLE. 2. If the memory block is in ZONE_MOVABLE, you can change it to ZONE_NORMAL, but this memory block must be adjacent to ZONE_NORMAL. With this patch, we can easy to know a memory block can be onlined to which zone, and don't need to know the above two limits. Updated the related Documentation. [akpm@linux-foundation.org: use conventional comment layout] [akpm@linux-foundation.org: fix build with CONFIG_MEMORY_HOTREMOVE=n] [akpm@linux-foundation.org: remove unused local zone_prev] Signed-off-by: Zhang Zhen Cc: Dave Hansen Cc: David Rientjes Cc: Toshi Kani Cc: Yasuaki Ishimatsu Cc: Naoya Horiguchi Cc: Wang Nan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'drivers/base') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index a2e13e250bba..7c5d87191b28 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -373,6 +373,45 @@ static ssize_t show_phys_device(struct device *dev, return sprintf(buf, "%d\n", mem->phys_device); } +#ifdef CONFIG_MEMORY_HOTREMOVE +static ssize_t show_valid_zones(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct memory_block *mem = to_memory_block(dev); + unsigned long start_pfn, end_pfn; + unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; + struct page *first_page; + struct zone *zone; + + start_pfn = section_nr_to_pfn(mem->start_section_nr); + end_pfn = start_pfn + nr_pages; + first_page = pfn_to_page(start_pfn); + + /* The block contains more than one zone can not be offlined. */ + if (!test_pages_in_a_zone(start_pfn, end_pfn)) + return sprintf(buf, "none\n"); + + zone = page_zone(first_page); + + if (zone_idx(zone) == ZONE_MOVABLE - 1) { + /*The mem block is the last memoryblock of this zone.*/ + if (end_pfn == zone_end_pfn(zone)) + return sprintf(buf, "%s %s\n", + zone->name, (zone + 1)->name); + } + + if (zone_idx(zone) == ZONE_MOVABLE) { + /*The mem block is the first memoryblock of ZONE_MOVABLE.*/ + if (start_pfn == zone->zone_start_pfn) + return sprintf(buf, "%s %s\n", + zone->name, (zone - 1)->name); + } + + return sprintf(buf, "%s\n", zone->name); +} +static DEVICE_ATTR(valid_zones, 0444, show_valid_zones, NULL); +#endif + static DEVICE_ATTR(phys_index, 0444, show_mem_start_phys_index, NULL); static DEVICE_ATTR(state, 0644, show_mem_state, store_mem_state); static DEVICE_ATTR(phys_device, 0444, show_phys_device, NULL); @@ -523,6 +562,9 @@ static struct attribute *memory_memblk_attrs[] = { &dev_attr_state.attr, &dev_attr_phys_device.attr, &dev_attr_removable.attr, +#ifdef CONFIG_MEMORY_HOTREMOVE + &dev_attr_valid_zones.attr, +#endif NULL }; -- cgit v1.2.3-59-g8ed1b From 513510ddba9650fc7da456eefeb0ead7632324f6 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 9 Oct 2014 15:26:40 -0700 Subject: common: dma-mapping: introduce common remapping functions For architectures without coherent DMA, memory for DMA may need to be remapped with coherent attributes. Factor out the the remapping code from arm and put it in a common location to reduce code duplication. As part of this, the arm APIs are now migrated away from ioremap_page_range to the common APIs which use map_vm_area for remapping. This should be an equivalent change and using map_vm_area is more correct as ioremap_page_range is intended to bring in io addresses into the cpu space and not regular kernel managed memory. Signed-off-by: Laura Abbott Reviewed-by: Catalin Marinas Cc: Arnd Bergmann Cc: David Riley Cc: Olof Johansson Cc: Ritesh Harjain Cc: Russell King Cc: Thierry Reding Cc: Will Deacon Cc: James Hogan Cc: Laura Abbott Cc: Mitchel Humpherys Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/dma-mapping.c | 57 ++++--------------------- drivers/base/dma-mapping.c | 72 ++++++++++++++++++++++++++++++++ include/asm-generic/dma-mapping-common.h | 9 ++++ 3 files changed, 90 insertions(+), 48 deletions(-) (limited to 'drivers/base') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 7a996aaa061e..eecc8e60deea 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -298,37 +298,19 @@ static void * __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, const void *caller) { - struct vm_struct *area; - unsigned long addr; - /* * DMA allocation can be mapped to user space, so lets * set VM_USERMAP flags too. */ - area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, - caller); - if (!area) - return NULL; - addr = (unsigned long)area->addr; - area->phys_addr = __pfn_to_phys(page_to_pfn(page)); - - if (ioremap_page_range(addr, addr + size, area->phys_addr, prot)) { - vunmap((void *)addr); - return NULL; - } - return (void *)addr; + return dma_common_contiguous_remap(page, size, + VM_ARM_DMA_CONSISTENT | VM_USERMAP, + prot, caller); } static void __dma_free_remap(void *cpu_addr, size_t size) { - unsigned int flags = VM_ARM_DMA_CONSISTENT | VM_USERMAP; - struct vm_struct *area = find_vm_area(cpu_addr); - if (!area || (area->flags & flags) != flags) { - WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); - return; - } - unmap_kernel_range((unsigned long)cpu_addr, size); - vunmap(cpu_addr); + dma_common_free_remap(cpu_addr, size, + VM_ARM_DMA_CONSISTENT | VM_USERMAP); } #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K @@ -1271,29 +1253,8 @@ static void * __iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot, const void *caller) { - unsigned int i, nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; - struct vm_struct *area; - unsigned long p; - - area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, - caller); - if (!area) - return NULL; - - area->pages = pages; - area->nr_pages = nr_pages; - p = (unsigned long)area->addr; - - for (i = 0; i < nr_pages; i++) { - phys_addr_t phys = __pfn_to_phys(page_to_pfn(pages[i])); - if (ioremap_page_range(p, p + PAGE_SIZE, phys, prot)) - goto err; - p += PAGE_SIZE; - } - return area->addr; -err: - unmap_kernel_range((unsigned long)area->addr, size); - vunmap(area->addr); + return dma_common_pages_remap(pages, size, + VM_ARM_DMA_CONSISTENT | VM_USERMAP, prot, caller); return NULL; } @@ -1501,8 +1462,8 @@ void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, } if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) { - unmap_kernel_range((unsigned long)cpu_addr, size); - vunmap(cpu_addr); + dma_common_free_remap(cpu_addr, size, + VM_ARM_DMA_CONSISTENT | VM_USERMAP); } __iommu_remove_mapping(dev, handle, size); diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c index 6cd08e145bfa..9e8bbdd470ca 100644 --- a/drivers/base/dma-mapping.c +++ b/drivers/base/dma-mapping.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include /* @@ -267,3 +269,73 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, return ret; } EXPORT_SYMBOL(dma_common_mmap); + +#ifdef CONFIG_MMU +/* + * remaps an array of PAGE_SIZE pages into another vm_area + * Cannot be used in non-sleeping contexts + */ +void *dma_common_pages_remap(struct page **pages, size_t size, + unsigned long vm_flags, pgprot_t prot, + const void *caller) +{ + struct vm_struct *area; + + area = get_vm_area_caller(size, vm_flags, caller); + if (!area) + return NULL; + + area->pages = pages; + + if (map_vm_area(area, prot, pages)) { + vunmap(area->addr); + return NULL; + } + + return area->addr; +} + +/* + * remaps an allocated contiguous region into another vm_area. + * Cannot be used in non-sleeping contexts + */ + +void *dma_common_contiguous_remap(struct page *page, size_t size, + unsigned long vm_flags, + pgprot_t prot, const void *caller) +{ + int i; + struct page **pages; + void *ptr; + unsigned long pfn; + + pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL); + if (!pages) + return NULL; + + for (i = 0, pfn = page_to_pfn(page); i < (size >> PAGE_SHIFT); i++) + pages[i] = pfn_to_page(pfn + i); + + ptr = dma_common_pages_remap(pages, size, vm_flags, prot, caller); + + kfree(pages); + + return ptr; +} + +/* + * unmaps a range previously mapped by dma_common_*_remap + */ +void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags) +{ + struct vm_struct *area = find_vm_area(cpu_addr); + + if (!area || (area->flags & vm_flags) != vm_flags) { + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); + return; + } + + unmap_kernel_range((unsigned long)cpu_addr, size); + vunmap(cpu_addr); +} +#endif diff --git a/include/asm-generic/dma-mapping-common.h b/include/asm-generic/dma-mapping-common.h index de8bf89940f8..a9fd248f5d48 100644 --- a/include/asm-generic/dma-mapping-common.h +++ b/include/asm-generic/dma-mapping-common.h @@ -179,6 +179,15 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size); +void *dma_common_contiguous_remap(struct page *page, size_t size, + unsigned long vm_flags, + pgprot_t prot, const void *caller); + +void *dma_common_pages_remap(struct page **pages, size_t size, + unsigned long vm_flags, pgprot_t prot, + const void *caller); +void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags); + /** * dma_mmap_attrs - map a coherent DMA allocation into user space * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices -- cgit v1.2.3-59-g8ed1b From 1f13ae399c58af5a05b5cee61da864e1f4071de4 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 9 Oct 2014 15:27:39 -0700 Subject: mm: remove noisy remainder of the scan_unevictable interface The deprecation warnings for the scan_unevictable interface triggers by scripts doing `sysctl -a | grep something else'. This is annoying and not helpful. The interface has been defunct since 264e56d8247e ("mm: disable user interface to manually rescue unevictable pages"), which was in 2011, and there haven't been any reports of usecases for it, only reports that the deprecation warnings are annying. It's unlikely that anybody is using this interface specifically at this point, so remove it. Signed-off-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/stable/sysfs-devices-node | 8 ---- drivers/base/node.c | 3 -- include/linux/swap.h | 16 -------- kernel/sysctl.c | 7 ---- mm/vmscan.c | 63 ----------------------------- 5 files changed, 97 deletions(-) (limited to 'drivers/base') diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node index ce259c13c36a..5b2d0f08867c 100644 --- a/Documentation/ABI/stable/sysfs-devices-node +++ b/Documentation/ABI/stable/sysfs-devices-node @@ -85,14 +85,6 @@ Description: will be compacted. When it completes, memory will be freed into blocks which have as many contiguous pages as possible -What: /sys/devices/system/node/nodeX/scan_unevictable_pages -Date: October 2008 -Contact: Lee Schermerhorn -Description: - When set, it triggers scanning the node's unevictable lists - and move any pages that have become evictable onto the respective - zone's inactive list. See mm/vmscan.c - What: /sys/devices/system/node/nodeX/hugepages/hugepages-/ Date: December 2009 Contact: Lee Schermerhorn diff --git a/drivers/base/node.c b/drivers/base/node.c index d51c49c9bafa..472168cd0c97 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -289,8 +289,6 @@ static int register_node(struct node *node, int num, struct node *parent) device_create_file(&node->dev, &dev_attr_distance); device_create_file(&node->dev, &dev_attr_vmstat); - scan_unevictable_register_node(node); - hugetlb_register_node(node); compaction_register_node(node); @@ -314,7 +312,6 @@ void unregister_node(struct node *node) device_remove_file(&node->dev, &dev_attr_distance); device_remove_file(&node->dev, &dev_attr_vmstat); - scan_unevictable_unregister_node(node); hugetlb_unregister_node(node); /* no-op, if memoryless node */ device_unregister(&node->dev); diff --git a/include/linux/swap.h b/include/linux/swap.h index 1b72060f093a..ea4f926e6b9b 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -354,22 +354,6 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order) extern int page_evictable(struct page *page); extern void check_move_unevictable_pages(struct page **, int nr_pages); -extern unsigned long scan_unevictable_pages; -extern int scan_unevictable_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -#ifdef CONFIG_NUMA -extern int scan_unevictable_register_node(struct node *node); -extern void scan_unevictable_unregister_node(struct node *node); -#else -static inline int scan_unevictable_register_node(struct node *node) -{ - return 0; -} -static inline void scan_unevictable_unregister_node(struct node *node) -{ -} -#endif - extern int kswapd_run(int nid); extern void kswapd_stop(int nid); #ifdef CONFIG_MEMCG diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 75875a741b5e..91180987e40e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1460,13 +1460,6 @@ static struct ctl_table vm_table[] = { .extra2 = &one, }, #endif - { - .procname = "scan_unevictable_pages", - .data = &scan_unevictable_pages, - .maxlen = sizeof(scan_unevictable_pages), - .mode = 0644, - .proc_handler = scan_unevictable_handler, - }, #ifdef CONFIG_MEMORY_FAILURE { .procname = "memory_failure_early_kill", diff --git a/mm/vmscan.c b/mm/vmscan.c index 1a71b8b1ea34..af72fe8e8d74 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3797,66 +3797,3 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) } } #endif /* CONFIG_SHMEM */ - -static void warn_scan_unevictable_pages(void) -{ - printk_once(KERN_WARNING - "%s: The scan_unevictable_pages sysctl/node-interface has been " - "disabled for lack of a legitimate use case. If you have " - "one, please send an email to linux-mm@kvack.org.\n", - current->comm); -} - -/* - * scan_unevictable_pages [vm] sysctl handler. On demand re-scan of - * all nodes' unevictable lists for evictable pages - */ -unsigned long scan_unevictable_pages; - -int scan_unevictable_handler(struct ctl_table *table, int write, - void __user *buffer, - size_t *length, loff_t *ppos) -{ - warn_scan_unevictable_pages(); - proc_doulongvec_minmax(table, write, buffer, length, ppos); - scan_unevictable_pages = 0; - return 0; -} - -#ifdef CONFIG_NUMA -/* - * per node 'scan_unevictable_pages' attribute. On demand re-scan of - * a specified node's per zone unevictable lists for evictable pages. - */ - -static ssize_t read_scan_unevictable_node(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - warn_scan_unevictable_pages(); - return sprintf(buf, "0\n"); /* always zero; should fit... */ -} - -static ssize_t write_scan_unevictable_node(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - warn_scan_unevictable_pages(); - return 1; -} - - -static DEVICE_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR, - read_scan_unevictable_node, - write_scan_unevictable_node); - -int scan_unevictable_register_node(struct node *node) -{ - return device_create_file(&node->dev, &dev_attr_scan_unevictable_pages); -} - -void scan_unevictable_unregister_node(struct node *node) -{ - device_remove_file(&node->dev, &dev_attr_scan_unevictable_pages); -} -#endif -- cgit v1.2.3-59-g8ed1b From f0d6d1f6ff6f8525cfa396ec1969b8f402391445 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 9 Oct 2014 15:29:41 -0700 Subject: CMA: document cma=0 It isn't obvious that CMA can be disabled on the kernel's command line, so document it. Signed-off-by: Jean Delvare Cc: Joonsoo Kim Cc: Greg Kroah-Hartman Cc: Akinobu Mita Cc: Chuck Ebbert Cc: Marek Szyprowski Cc: Konrad Rzeszutek Wilk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 3 ++- drivers/base/Kconfig | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/base') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a126a31dde02..809e880bc787 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -656,7 +656,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Sets the size of kernel global memory area for contiguous memory allocations and optionally the placement constraint by the physical address range of - memory allocations. For more information, see + memory allocations. A value of 0 disables CMA + altogether. For more information, see include/linux/dma-contiguous.h cmo_free_hint= [PPC] Format: { yes | no } diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 134f763d90fd..61a33f4ba608 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -252,6 +252,9 @@ config DMA_CMA to allocate big physically-contiguous blocks of memory for use with hardware components that do not support I/O map nor scatter-gather. + You can disable CMA by specifying "cma=0" on the kernel's command + line. + For more information see . If unsure, say "n". -- cgit v1.2.3-59-g8ed1b