From 7771bdbbfd3d6f204631b6fd9e1bbc30cd15918e Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 5 Mar 2019 15:41:20 -0800 Subject: kasan: remove use after scope bugs detection. Use after scope bugs detector seems to be almost entirely useless for the linux kernel. It exists over two years, but I've seen only one valid bug so far [1]. And the bug was fixed before it has been reported. There were some other use-after-scope reports, but they were false-positives due to different reasons like incompatibility with structleak plugin. This feature significantly increases stack usage, especially with GCC < 9 version, and causes a 32K stack overflow. It probably adds performance penalty too. Given all that, let's remove use-after-scope detector entirely. While preparing this patch I've noticed that we mistakenly enable use-after-scope detection for clang compiler regardless of CONFIG_KASAN_EXTRA setting. This is also fixed now. [1] http://lkml.kernel.org/r/<20171129052106.rhgbjhhis53hkgfn@wfg-t540p.sh.intel.com> Link: http://lkml.kernel.org/r/20190111185842.13978-1-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Acked-by: Will Deacon [arm64] Cc: Qian Cai Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 1 - lib/Kconfig.kasan | 10 ---------- lib/test_kasan.c | 24 ------------------------ 3 files changed, 35 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d4df5b24d75e..a219f3488ad7 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -222,7 +222,6 @@ config ENABLE_MUST_CHECK config FRAME_WARN int "Warn for stack frames larger than (needs gcc 4.4)" range 0 8192 - default 3072 if KASAN_EXTRA default 2048 if GCC_PLUGIN_LATENT_ENTROPY default 1280 if (!64BIT && PARISC) default 1024 if (!64BIT && !PARISC) diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 9737059ec58b..9950b660e62d 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -78,16 +78,6 @@ config KASAN_SW_TAGS endchoice -config KASAN_EXTRA - bool "KASAN: extra checks" - depends on KASAN_GENERIC && DEBUG_KERNEL && !COMPILE_TEST - help - This enables further checks in generic KASAN, for now it only - includes the address-use-after-scope check that can lead to - excessive kernel stack usage, frame size warnings and longer - compile time. - See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 - choice prompt "Instrumentation type" depends on KASAN diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 51b78405bf24..7de2702621dc 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -480,29 +480,6 @@ static noinline void __init copy_user_test(void) kfree(kmem); } -static noinline void __init use_after_scope_test(void) -{ - volatile char *volatile p; - - pr_info("use-after-scope on int\n"); - { - int local = 0; - - p = (char *)&local; - } - p[0] = 1; - p[3] = 1; - - pr_info("use-after-scope on array\n"); - { - char local[1024] = {0}; - - p = local; - } - p[0] = 1; - p[1023] = 1; -} - static noinline void __init kasan_alloca_oob_left(void) { volatile int i = 10; @@ -682,7 +659,6 @@ static int __init kmalloc_tests_init(void) kasan_alloca_oob_right(); ksize_unpoisons_memory(); copy_user_test(); - use_after_scope_test(); kmem_cache_double_free(); kmem_cache_invalid_free(); kasan_memchr(); -- cgit v1.2.3-59-g8ed1b From 98fa15f34cb379864757670b8e8743b21456a20e Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 5 Mar 2019 15:42:58 -0800 Subject: mm: replace all open encodings for NUMA_NO_NODE Patch series "Replace all open encodings for NUMA_NO_NODE", v3. All these places for replacement were found by running the following grep patterns on the entire kernel code. Please let me know if this might have missed some instances. This might also have replaced some false positives. I will appreciate suggestions, inputs and review. 1. git grep "nid == -1" 2. git grep "node == -1" 3. git grep "nid = -1" 4. git grep "node = -1" This patch (of 2): At present there are multiple places where invalid node number is encoded as -1. Even though implicitly understood it is always better to have macros in there. Replace these open encodings for an invalid node number with the global macro NUMA_NO_NODE. This helps remove NUMA related assumptions like 'invalid node' from various places redirecting them to a common definition. Link: http://lkml.kernel.org/r/1545127933-10711-2-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Reviewed-by: David Hildenbrand Acked-by: Jeff Kirsher [ixgbe] Acked-by: Jens Axboe [mtip32xx] Acked-by: Vinod Koul [dmaengine.c] Acked-by: Michael Ellerman [powerpc] Acked-by: Doug Ledford [drivers/infiniband] Cc: Joseph Qi Cc: Hans Verkuil Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/topology.h | 3 ++- arch/ia64/kernel/numa.c | 2 +- arch/ia64/mm/discontig.c | 6 +++--- arch/powerpc/include/asm/pci-bridge.h | 3 ++- arch/powerpc/kernel/paca.c | 3 ++- arch/powerpc/kernel/pci-common.c | 3 ++- arch/powerpc/mm/numa.c | 14 +++++++------- arch/powerpc/platforms/powernv/memtrace.c | 5 +++-- arch/sparc/kernel/pci_fire.c | 3 ++- arch/sparc/kernel/pci_schizo.c | 3 ++- arch/sparc/kernel/psycho_common.c | 3 ++- arch/sparc/kernel/sbus.c | 3 ++- arch/sparc/mm/init_64.c | 6 +++--- arch/x86/include/asm/pci.h | 3 ++- arch/x86/kernel/apic/x2apic_uv_x.c | 7 ++++--- arch/x86/kernel/smpboot.c | 3 ++- drivers/block/mtip32xx/mtip32xx.c | 5 +++-- drivers/dma/dmaengine.c | 4 +++- drivers/infiniband/hw/hfi1/affinity.c | 3 ++- drivers/infiniband/hw/hfi1/init.c | 3 ++- drivers/iommu/dmar.c | 5 +++-- drivers/iommu/intel-iommu.c | 3 ++- drivers/misc/sgi-xp/xpc_uv.c | 3 ++- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 5 +++-- include/linux/device.h | 2 +- init/init_task.c | 3 ++- kernel/kthread.c | 3 ++- kernel/sched/fair.c | 15 ++++++++------- lib/cpumask.c | 3 ++- mm/huge_memory.c | 13 +++++++------ mm/hugetlb.c | 3 ++- mm/ksm.c | 2 +- mm/memory.c | 7 ++++--- mm/memory_hotplug.c | 12 ++++++------ mm/mempolicy.c | 2 +- mm/page_alloc.c | 4 ++-- mm/page_ext.c | 2 +- net/core/pktgen.c | 3 ++- net/qrtr/qrtr.c | 3 ++- 39 files changed, 104 insertions(+), 74 deletions(-) (limited to 'lib') diff --git a/arch/alpha/include/asm/topology.h b/arch/alpha/include/asm/topology.h index e6e13a85796a..5a77a40567fa 100644 --- a/arch/alpha/include/asm/topology.h +++ b/arch/alpha/include/asm/topology.h @@ -4,6 +4,7 @@ #include #include +#include #include #ifdef CONFIG_NUMA @@ -29,7 +30,7 @@ static const struct cpumask *cpumask_of_node(int node) { int cpu; - if (node == -1) + if (node == NUMA_NO_NODE) return cpu_all_mask; cpumask_clear(&node_to_cpumask_map[node]); diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c index 92c376279c6d..1315da6c7aeb 100644 --- a/arch/ia64/kernel/numa.c +++ b/arch/ia64/kernel/numa.c @@ -74,7 +74,7 @@ void __init build_cpu_to_node_map(void) cpumask_clear(&node_to_cpu_mask[node]); for_each_possible_early_cpu(cpu) { - node = -1; + node = NUMA_NO_NODE; for (i = 0; i < NR_CPUS; ++i) if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) { node = node_cpuid[i].nid; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 8a965784340c..f9c36750c6a4 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -227,7 +227,7 @@ void __init setup_per_cpu_areas(void) * CPUs are put into groups according to node. Walk cpu_map * and create new groups at node boundaries. */ - prev_node = -1; + prev_node = NUMA_NO_NODE; ai->nr_groups = 0; for (unit = 0; unit < nr_units; unit++) { cpu = cpu_map[unit]; @@ -435,7 +435,7 @@ static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize) { void *ptr = NULL; u8 best = 0xff; - int bestnode = -1, node, anynode = 0; + int bestnode = NUMA_NO_NODE, node, anynode = 0; for_each_online_node(node) { if (node_isset(node, memory_less_mask)) @@ -447,7 +447,7 @@ static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize) anynode = node; } - if (bestnode == -1) + if (bestnode == NUMA_NO_NODE) bestnode = anynode; ptr = memblock_alloc_try_nid(pernodesize, PERCPU_PAGE_SIZE, diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index aee4fcc24990..77fc21278fa2 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -10,6 +10,7 @@ #include #include #include +#include struct device_node; @@ -265,7 +266,7 @@ extern int pcibios_map_io_space(struct pci_bus *bus); #ifdef CONFIG_NUMA #define PHB_SET_NODE(PHB, NODE) ((PHB)->node = (NODE)) #else -#define PHB_SET_NODE(PHB, NODE) ((PHB)->node = -1) +#define PHB_SET_NODE(PHB, NODE) ((PHB)->node = NUMA_NO_NODE) #endif #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 913bfca09c4f..b8480127793d 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -36,7 +37,7 @@ static void *__init alloc_paca_data(unsigned long size, unsigned long align, * which will put its paca in the right place. */ if (cpu == boot_cpuid) { - nid = -1; + nid = NUMA_NO_NODE; memblock_set_bottom_up(true); } else { nid = early_cpu_to_node(cpu); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 88e4f69a09e5..4538e8ddde80 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -132,7 +133,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev) int nid = of_node_to_nid(dev); if (nid < 0 || !node_online(nid)) - nid = -1; + nid = NUMA_NO_NODE; PHB_SET_NODE(phb, nid); } diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 87f0dd004295..270cefb75cca 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -215,7 +215,7 @@ static void initialize_distance_lookup_table(int nid, */ static int associativity_to_nid(const __be32 *associativity) { - int nid = -1; + int nid = NUMA_NO_NODE; if (min_common_depth == -1) goto out; @@ -225,7 +225,7 @@ static int associativity_to_nid(const __be32 *associativity) /* POWER4 LPAR uses 0xffff as invalid node */ if (nid == 0xffff || nid >= MAX_NUMNODES) - nid = -1; + nid = NUMA_NO_NODE; if (nid > 0 && of_read_number(associativity, 1) >= distance_ref_points_depth) { @@ -244,7 +244,7 @@ out: */ static int of_node_to_nid_single(struct device_node *device) { - int nid = -1; + int nid = NUMA_NO_NODE; const __be32 *tmp; tmp = of_get_associativity(device); @@ -256,7 +256,7 @@ static int of_node_to_nid_single(struct device_node *device) /* Walk the device tree upwards, looking for an associativity id */ int of_node_to_nid(struct device_node *device) { - int nid = -1; + int nid = NUMA_NO_NODE; of_node_get(device); while (device) { @@ -454,7 +454,7 @@ static int of_drconf_to_nid_single(struct drmem_lmb *lmb) */ static int numa_setup_cpu(unsigned long lcpu) { - int nid = -1; + int nid = NUMA_NO_NODE; struct device_node *cpu; /* @@ -930,7 +930,7 @@ static int hot_add_drconf_scn_to_nid(unsigned long scn_addr) { struct drmem_lmb *lmb; unsigned long lmb_size; - int nid = -1; + int nid = NUMA_NO_NODE; lmb_size = drmem_lmb_size(); @@ -960,7 +960,7 @@ static int hot_add_drconf_scn_to_nid(unsigned long scn_addr) static int hot_add_node_scn_to_nid(unsigned long scn_addr) { struct device_node *memory; - int nid = -1; + int nid = NUMA_NO_NODE; for_each_node_by_type(memory, "memory") { unsigned long start, size; diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 84d038ed3882..248a38ad25c7 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -223,7 +224,7 @@ static int memtrace_online(void) ent = &memtrace_array[i]; /* We have onlined this chunk previously */ - if (ent->nid == -1) + if (ent->nid == NUMA_NO_NODE) continue; /* Remove from io mappings */ @@ -257,7 +258,7 @@ static int memtrace_online(void) */ debugfs_remove_recursive(ent->dir); pr_info("Added trace memory back to node %d\n", ent->nid); - ent->size = ent->start = ent->nid = -1; + ent->size = ent->start = ent->nid = NUMA_NO_NODE; } if (ret) return ret; diff --git a/arch/sparc/kernel/pci_fire.c b/arch/sparc/kernel/pci_fire.c index be71ae086622..0ca08d455e80 100644 --- a/arch/sparc/kernel/pci_fire.c +++ b/arch/sparc/kernel/pci_fire.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -416,7 +417,7 @@ static int pci_fire_pbm_init(struct pci_pbm_info *pbm, struct device_node *dp = op->dev.of_node; int err; - pbm->numa_node = -1; + pbm->numa_node = NUMA_NO_NODE; pbm->pci_ops = &sun4u_pci_ops; pbm->config_space_reg_bits = 12; diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c index 934b97c72f7c..421aba00e6b0 100644 --- a/arch/sparc/kernel/pci_schizo.c +++ b/arch/sparc/kernel/pci_schizo.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -1347,7 +1348,7 @@ static int schizo_pbm_init(struct pci_pbm_info *pbm, pbm->next = pci_pbm_root; pci_pbm_root = pbm; - pbm->numa_node = -1; + pbm->numa_node = NUMA_NO_NODE; pbm->pci_ops = &sun4u_pci_ops; pbm->config_space_reg_bits = 8; diff --git a/arch/sparc/kernel/psycho_common.c b/arch/sparc/kernel/psycho_common.c index 81aa91e5c0e6..e90bcb6bad7f 100644 --- a/arch/sparc/kernel/psycho_common.c +++ b/arch/sparc/kernel/psycho_common.c @@ -5,6 +5,7 @@ */ #include #include +#include #include @@ -454,7 +455,7 @@ void psycho_pbm_init_common(struct pci_pbm_info *pbm, struct platform_device *op struct device_node *dp = op->dev.of_node; pbm->name = dp->full_name; - pbm->numa_node = -1; + pbm->numa_node = NUMA_NO_NODE; pbm->chip_type = chip_type; pbm->chip_version = of_getintprop_default(dp, "version#", 0); pbm->chip_revision = of_getintprop_default(dp, "module-revision#", 0); diff --git a/arch/sparc/kernel/sbus.c b/arch/sparc/kernel/sbus.c index 41c5deb581b8..32141e1006c4 100644 --- a/arch/sparc/kernel/sbus.c +++ b/arch/sparc/kernel/sbus.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -561,7 +562,7 @@ static void __init sbus_iommu_init(struct platform_device *op) op->dev.archdata.iommu = iommu; op->dev.archdata.stc = strbuf; - op->dev.archdata.numa_node = -1; + op->dev.archdata.numa_node = NUMA_NO_NODE; reg_base = regs + SYSIO_IOMMUREG_BASE; iommu->iommu_control = reg_base + IOMMU_CONTROL; diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index b4221d3727d0..9e6bd868ba6f 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -976,13 +976,13 @@ static u64 __init memblock_nid_range_sun4u(u64 start, u64 end, int *nid) { int prev_nid, new_nid; - prev_nid = -1; + prev_nid = NUMA_NO_NODE; for ( ; start < end; start += PAGE_SIZE) { for (new_nid = 0; new_nid < num_node_masks; new_nid++) { struct node_mem_mask *p = &node_masks[new_nid]; if ((start & p->mask) == p->match) { - if (prev_nid == -1) + if (prev_nid == NUMA_NO_NODE) prev_nid = new_nid; break; } @@ -1208,7 +1208,7 @@ int of_node_to_nid(struct device_node *dp) md = mdesc_grab(); count = 0; - nid = -1; + nid = NUMA_NO_NODE; mdesc_for_each_node_by_name(md, grp, "group") { if (!scan_arcs_for_cfg_handle(md, grp, cfg_handle)) { nid = count; diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 662963681ea6..e662f987dfa2 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -141,7 +142,7 @@ cpumask_of_pcibus(const struct pci_bus *bus) int node; node = __pcibus_to_node(bus); - return (node == -1) ? cpu_online_mask : + return (node == NUMA_NO_NODE) ? cpu_online_mask : cpumask_of_node(node); } #endif diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index a555da094157..1e225528f0d7 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -1390,7 +1391,7 @@ static void __init build_socket_tables(void) } /* Set socket -> node values: */ - lnid = -1; + lnid = NUMA_NO_NODE; for_each_present_cpu(cpu) { int nid = cpu_to_node(cpu); int apicid, sockid; @@ -1521,7 +1522,7 @@ static void __init uv_system_init_hub(void) new_hub->pnode = 0xffff; new_hub->numa_blade_id = uv_node_to_blade_id(nodeid); - new_hub->memory_nid = -1; + new_hub->memory_nid = NUMA_NO_NODE; new_hub->nr_possible_cpus = 0; new_hub->nr_online_cpus = 0; } @@ -1538,7 +1539,7 @@ static void __init uv_system_init_hub(void) uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list(nodeid); uv_cpu_info_per(cpu)->blade_cpu_id = uv_cpu_hub_info(cpu)->nr_possible_cpus++; - if (uv_cpu_hub_info(cpu)->memory_nid == -1) + if (uv_cpu_hub_info(cpu)->memory_nid == NUMA_NO_NODE) uv_cpu_hub_info(cpu)->memory_nid = cpu_to_node(cpu); /* Init memoryless node: */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ccd1f2a8e557..c91ff9f9fe8a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include @@ -841,7 +842,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) /* reduce the number of lines printed when booting a large cpu count system */ static void announce_cpu(int cpu, int apicid) { - static int current_node = -1; + static int current_node = NUMA_NO_NODE; int node = early_cpu_to_node(cpu); static int width, node_width; diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 88e8440e75c3..2f3ee4d6af82 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "mtip32xx.h" #define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32) @@ -4018,9 +4019,9 @@ static int get_least_used_cpu_on_node(int node) /* Helper for selecting a node in round robin mode */ static inline int mtip_get_next_rr_node(void) { - static int next_node = -1; + static int next_node = NUMA_NO_NODE; - if (next_node == -1) { + if (next_node == NUMA_NO_NODE) { next_node = first_online_node; return next_node; } diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index f1a441ab395d..3a11b1092e80 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -63,6 +63,7 @@ #include #include #include +#include static DEFINE_MUTEX(dma_list_mutex); static DEFINE_IDA(dma_ida); @@ -386,7 +387,8 @@ EXPORT_SYMBOL(dma_issue_pending_all); static bool dma_chan_is_local(struct dma_chan *chan, int cpu) { int node = dev_to_node(chan->device->dev); - return node == -1 || cpumask_test_cpu(cpu, cpumask_of_node(node)); + return node == NUMA_NO_NODE || + cpumask_test_cpu(cpu, cpumask_of_node(node)); } /** diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 2baf38cc1e23..4fe662c3bbc1 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -48,6 +48,7 @@ #include #include #include +#include #include "hfi.h" #include "affinity.h" @@ -777,7 +778,7 @@ void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd) _dev_comp_vect_cpu_mask_clean_up(dd, entry); unlock: mutex_unlock(&node_affinity.lock); - dd->node = -1; + dd->node = NUMA_NO_NODE; } /* diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 7835eb52e7c5..441b06e2a154 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include "hfi.h" @@ -1303,7 +1304,7 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, dd->unit = ret; list_add(&dd->list, &hfi1_dev_list); } - dd->node = -1; + dd->node = NUMA_NO_NODE; spin_unlock_irqrestore(&hfi1_devs_lock, flags); idr_preload_end(); diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 58dc70bffd5b..9c49300e9fb7 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -477,7 +478,7 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg) int node = acpi_map_pxm_to_node(rhsa->proximity_domain); if (!node_online(node)) - node = -1; + node = NUMA_NO_NODE; drhd->iommu->node = node; return 0; } @@ -1062,7 +1063,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->msagaw = msagaw; iommu->segment = drhd->segment; - iommu->node = -1; + iommu->node = NUMA_NO_NODE; ver = readl(iommu->reg + DMAR_VER_REG); pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n", diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 78188bf7e90d..39a33dec4d0b 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -1716,7 +1717,7 @@ static struct dmar_domain *alloc_domain(int flags) return NULL; memset(domain, 0, sizeof(*domain)); - domain->nid = -1; + domain->nid = NUMA_NO_NODE; domain->flags = flags; domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c index 0441abe87880..9e443df44b3b 100644 --- a/drivers/misc/sgi-xp/xpc_uv.c +++ b/drivers/misc/sgi-xp/xpc_uv.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #if defined CONFIG_X86_64 #include @@ -61,7 +62,7 @@ static struct xpc_heartbeat_uv *xpc_heartbeat_uv; XPC_NOTIFY_MSG_SIZE_UV) #define XPC_NOTIFY_IRQ_NAME "xpc_notify" -static int xpc_mq_node = -1; +static int xpc_mq_node = NUMA_NO_NODE; static struct xpc_gru_mq_uv *xpc_activate_mq_uv; static struct xpc_gru_mq_uv *xpc_notify_mq_uv; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index a4e7584a50cb..e100054a3765 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -6418,7 +6419,7 @@ int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring) { struct device *dev = tx_ring->dev; int orig_node = dev_to_node(dev); - int ring_node = -1; + int ring_node = NUMA_NO_NODE; int size; size = sizeof(struct ixgbe_tx_buffer) * tx_ring->count; @@ -6512,7 +6513,7 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter, { struct device *dev = rx_ring->dev; int orig_node = dev_to_node(dev); - int ring_node = -1; + int ring_node = NUMA_NO_NODE; int size; size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count; diff --git a/include/linux/device.h b/include/linux/device.h index 6cb4640b6160..4d2f13e8c540 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1095,7 +1095,7 @@ static inline void set_dev_node(struct device *dev, int node) #else static inline int dev_to_node(struct device *dev) { - return -1; + return NUMA_NO_NODE; } static inline void set_dev_node(struct device *dev, int node) { diff --git a/init/init_task.c b/init/init_task.c index 5aebe3be4d7c..26131e73aa6d 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -154,7 +155,7 @@ struct task_struct init_task .vtime.state = VTIME_SYS, #endif #ifdef CONFIG_NUMA_BALANCING - .numa_preferred_nid = -1, + .numa_preferred_nid = NUMA_NO_NODE, .numa_group = NULL, .numa_faults = NULL, #endif diff --git a/kernel/kthread.c b/kernel/kthread.c index 087d18d771b5..ebebbcf3c5de 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -20,6 +20,7 @@ #include #include #include +#include #include static DEFINE_SPINLOCK(kthread_create_lock); @@ -675,7 +676,7 @@ __kthread_create_worker(int cpu, unsigned int flags, { struct kthread_worker *worker; struct task_struct *task; - int node = -1; + int node = NUMA_NO_NODE; worker = kzalloc(sizeof(*worker), GFP_KERNEL); if (!worker) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 310d0637fe4b..0e6a0ef129c5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1160,7 +1160,7 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p) /* New address space, reset the preferred nid */ if (!(clone_flags & CLONE_VM)) { - p->numa_preferred_nid = -1; + p->numa_preferred_nid = NUMA_NO_NODE; return; } @@ -1180,13 +1180,13 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p) static void account_numa_enqueue(struct rq *rq, struct task_struct *p) { - rq->nr_numa_running += (p->numa_preferred_nid != -1); + rq->nr_numa_running += (p->numa_preferred_nid != NUMA_NO_NODE); rq->nr_preferred_running += (p->numa_preferred_nid == task_node(p)); } static void account_numa_dequeue(struct rq *rq, struct task_struct *p) { - rq->nr_numa_running -= (p->numa_preferred_nid != -1); + rq->nr_numa_running -= (p->numa_preferred_nid != NUMA_NO_NODE); rq->nr_preferred_running -= (p->numa_preferred_nid == task_node(p)); } @@ -1400,7 +1400,7 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page, * two full passes of the "multi-stage node selection" test that is * executed below. */ - if ((p->numa_preferred_nid == -1 || p->numa_scan_seq <= 4) && + if ((p->numa_preferred_nid == NUMA_NO_NODE || p->numa_scan_seq <= 4) && (cpupid_pid_unset(last_cpupid) || cpupid_match_pid(p, last_cpupid))) return true; @@ -1848,7 +1848,7 @@ static void numa_migrate_preferred(struct task_struct *p) unsigned long interval = HZ; /* This task has no NUMA fault statistics yet */ - if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults)) + if (unlikely(p->numa_preferred_nid == NUMA_NO_NODE || !p->numa_faults)) return; /* Periodically retry migrating the task to the preferred node */ @@ -2095,7 +2095,7 @@ static int preferred_group_nid(struct task_struct *p, int nid) static void task_numa_placement(struct task_struct *p) { - int seq, nid, max_nid = -1; + int seq, nid, max_nid = NUMA_NO_NODE; unsigned long max_faults = 0; unsigned long fault_types[2] = { 0, 0 }; unsigned long total_faults; @@ -2638,7 +2638,8 @@ static void update_scan_period(struct task_struct *p, int new_cpu) * the preferred node. */ if (dst_nid == p->numa_preferred_nid || - (p->numa_preferred_nid != -1 && src_nid != p->numa_preferred_nid)) + (p->numa_preferred_nid != NUMA_NO_NODE && + src_nid != p->numa_preferred_nid)) return; } diff --git a/lib/cpumask.c b/lib/cpumask.c index 8d666ab84b5c..087a3e9a0202 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -5,6 +5,7 @@ #include #include #include +#include /** * cpumask_next - get the next cpu in a cpumask @@ -206,7 +207,7 @@ unsigned int cpumask_local_spread(unsigned int i, int node) /* Wrap: we always want a cpu. */ i %= num_online_cpus(); - if (node == -1) { + if (node == NUMA_NO_NODE) { for_each_cpu(cpu, cpu_online_mask) if (i-- == 0) return cpu; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index faf357eaf0ce..d066f7ca1ee8 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -1475,7 +1476,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd) struct anon_vma *anon_vma = NULL; struct page *page; unsigned long haddr = vmf->address & HPAGE_PMD_MASK; - int page_nid = -1, this_nid = numa_node_id(); + int page_nid = NUMA_NO_NODE, this_nid = numa_node_id(); int target_nid, last_cpupid = -1; bool page_locked; bool migrated = false; @@ -1520,7 +1521,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd) */ page_locked = trylock_page(page); target_nid = mpol_misplaced(page, vma, haddr); - if (target_nid == -1) { + if (target_nid == NUMA_NO_NODE) { /* If the page was locked, there are no parallel migrations */ if (page_locked) goto clear_pmdnuma; @@ -1528,7 +1529,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd) /* Migration could have started since the pmd_trans_migrating check */ if (!page_locked) { - page_nid = -1; + page_nid = NUMA_NO_NODE; if (!get_page_unless_zero(page)) goto out_unlock; spin_unlock(vmf->ptl); @@ -1549,14 +1550,14 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd) if (unlikely(!pmd_same(pmd, *vmf->pmd))) { unlock_page(page); put_page(page); - page_nid = -1; + page_nid = NUMA_NO_NODE; goto out_unlock; } /* Bail if we fail to protect against THP splits for any reason */ if (unlikely(!anon_vma)) { put_page(page); - page_nid = -1; + page_nid = NUMA_NO_NODE; goto clear_pmdnuma; } @@ -1618,7 +1619,7 @@ out: if (anon_vma) page_unlock_anon_vma_read(anon_vma); - if (page_nid != -1) + if (page_nid != NUMA_NO_NODE) task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, flags); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 8dfdffc34a99..3c504fa6b460 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -887,7 +888,7 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask, struct zonelist *zonelist; struct zone *zone; struct zoneref *z; - int node = -1; + int node = NUMA_NO_NODE; zonelist = node_zonelist(nid, gfp_mask); diff --git a/mm/ksm.c b/mm/ksm.c index 6c48ad13b4c9..fd2db6a74d3c 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -598,7 +598,7 @@ static struct stable_node *alloc_stable_node_chain(struct stable_node *dup, chain->chain_prune_time = jiffies; chain->rmap_hlist_len = STABLE_NODE_CHAIN; #if defined (CONFIG_DEBUG_VM) && defined(CONFIG_NUMA) - chain->nid = -1; /* debug */ + chain->nid = NUMA_NO_NODE; /* debug */ #endif ksm_stable_node_chains++; diff --git a/mm/memory.c b/mm/memory.c index e11ca9dd823f..eb40f32295d2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -69,6 +69,7 @@ #include #include #include +#include #include #include @@ -3586,7 +3587,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct page *page = NULL; - int page_nid = -1; + int page_nid = NUMA_NO_NODE; int last_cpupid; int target_nid; bool migrated = false; @@ -3653,7 +3654,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid, &flags); pte_unmap_unlock(vmf->pte, vmf->ptl); - if (target_nid == -1) { + if (target_nid == NUMA_NO_NODE) { put_page(page); goto out; } @@ -3667,7 +3668,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) flags |= TNF_MIGRATE_FAIL; out: - if (page_nid != -1) + if (page_nid != NUMA_NO_NODE) task_numa_fault(last_cpupid, page_nid, 1, flags); return 0; } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 4f07c8ddfdd7..b3d3c64d15df 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -702,9 +702,9 @@ static void node_states_check_changes_online(unsigned long nr_pages, { int nid = zone_to_nid(zone); - arg->status_change_nid = -1; - arg->status_change_nid_normal = -1; - arg->status_change_nid_high = -1; + arg->status_change_nid = NUMA_NO_NODE; + arg->status_change_nid_normal = NUMA_NO_NODE; + arg->status_change_nid_high = NUMA_NO_NODE; if (!node_state(nid, N_MEMORY)) arg->status_change_nid = nid; @@ -1509,9 +1509,9 @@ static void node_states_check_changes_offline(unsigned long nr_pages, unsigned long present_pages = 0; enum zone_type zt; - arg->status_change_nid = -1; - arg->status_change_nid_normal = -1; - arg->status_change_nid_high = -1; + arg->status_change_nid = NUMA_NO_NODE; + arg->status_change_nid_normal = NUMA_NO_NODE; + arg->status_change_nid_high = NUMA_NO_NODE; /* * Check whether node_states[N_NORMAL_MEMORY] will be changed. diff --git a/mm/mempolicy.c b/mm/mempolicy.c index ee2bce59d2bf..76e7e4bc3335 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2304,7 +2304,7 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long unsigned long pgoff; int thiscpu = raw_smp_processor_id(); int thisnid = cpu_to_node(thiscpu); - int polnid = -1; + int polnid = NUMA_NO_NODE; int ret = -1; pol = get_vma_policy(vma, addr); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5361bd078493..1f9f1409df9b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6016,7 +6016,7 @@ int __meminit __early_pfn_to_nid(unsigned long pfn, return state->last_nid; nid = memblock_search_pfn_nid(pfn, &start_pfn, &end_pfn); - if (nid != -1) { + if (nid != NUMA_NO_NODE) { state->last_start = start_pfn; state->last_end = end_pfn; state->last_nid = nid; @@ -6771,7 +6771,7 @@ unsigned long __init node_map_pfn_alignment(void) { unsigned long accl_mask = 0, last_end = 0; unsigned long start, end, mask; - int last_nid = -1; + int last_nid = NUMA_NO_NODE; int i, nid; for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) { diff --git a/mm/page_ext.c b/mm/page_ext.c index 8c78b8d45117..762d5b7eb523 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -300,7 +300,7 @@ static int __meminit online_page_ext(unsigned long start_pfn, start = SECTION_ALIGN_DOWN(start_pfn); end = SECTION_ALIGN_UP(start_pfn + nr_pages); - if (nid == -1) { + if (nid == NUMA_NO_NODE) { /* * In this case, "nid" already exists and contains valid memory. * "start_pfn" passed to us is a pfn which is an arg for diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 6ac919847ce6..f3f5a78cd062 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -158,6 +158,7 @@ #include #include #include +#include #include #include #include @@ -3625,7 +3626,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->svlan_cfi = 0; pkt_dev->svlan_id = 0xffff; pkt_dev->burst = 1; - pkt_dev->node = -1; + pkt_dev->node = NUMA_NO_NODE; err = pktgen_setup_dev(t->net, pkt_dev, ifname); if (err) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 86e1e37eb4e8..b37e6e0a1026 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -15,6 +15,7 @@ #include #include #include /* For TIOCINQ/OUTQ */ +#include #include @@ -101,7 +102,7 @@ static inline struct qrtr_sock *qrtr_sk(struct sock *sk) return container_of(sk, struct qrtr_sock, sk); } -static unsigned int qrtr_local_nid = -1; +static unsigned int qrtr_local_nid = NUMA_NO_NODE; /* for node ids */ static RADIX_TREE(qrtr_nodes, GFP_KERNEL); -- cgit v1.2.3-59-g8ed1b From 3f21a6b7ef207892841feecc3b9216e1a29c745f Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Tue, 5 Mar 2019 15:43:34 -0800 Subject: vmalloc: add test driver to analyse vmalloc allocator This adds a new kernel module for analysis of vmalloc allocator. It is only enabled as a module. There are two main reasons this module should be used for: performance evaluation and stressing of vmalloc subsystem. It consists of several test cases. As of now there are 8. The module has five parameters we can specify to change its the behaviour. 1) run_test_mask - set of tests to be run id: 1, name: fix_size_alloc_test id: 2, name: full_fit_alloc_test id: 4, name: long_busy_list_alloc_test id: 8, name: random_size_alloc_test id: 16, name: fix_align_alloc_test id: 32, name: random_size_align_alloc_test id: 64, name: align_shift_alloc_test id: 128, name: pcpu_alloc_test By default all tests are in run test mask. If you want to select some specific tests it is possible to pass the mask. For example for first, second and fourth tests we go 11 value. 2) test_repeat_count - how many times each test should be repeated By default it is one time per test. It is possible to pass any number. As high the value is the test duration gets increased. 3) test_loop_count - internal test loop counter. By default it is set to 1000000. 4) single_cpu_test - use one CPU to run the tests By default this parameter is set to false. It means that all online CPUs execute tests. By setting it to 1, the tests are executed by first online CPU only. 5) sequential_test_order - run tests in sequential order By default this parameter is set to false. It means that before running tests the order is shuffled. It is possible to make it sequential, just set it to 1. Performance analysis: In order to evaluate performance of vmalloc allocations, usually it makes sense to use only one CPU that runs tests, use sequential order, number of repeat tests can be different as well as set of test mask. For example if we want to run all tests, to use one CPU and repeat each test 3 times. Insert the module passing following parameters: single_cpu_test=1 sequential_test_order=1 test_repeat_count=3 with following output: Summary: fix_size_alloc_test passed: 3 failed: 0 repeat: 3 loops: 1000000 avg: 901177 usec Summary: full_fit_alloc_test passed: 3 failed: 0 repeat: 3 loops: 1000000 avg: 1039341 usec Summary: long_busy_list_alloc_test passed: 3 failed: 0 repeat: 3 loops: 1000000 avg: 11775763 usec Summary: random_size_alloc_test passed 3: failed: 0 repeat: 3 loops: 1000000 avg: 6081992 usec Summary: fix_align_alloc_test passed: 3 failed: 0 repeat: 3, loops: 1000000 avg: 2003712 usec Summary: random_size_align_alloc_test passed: 3 failed: 0 repeat: 3 loops: 1000000 avg: 2895689 usec Summary: align_shift_alloc_test passed: 0 failed: 3 repeat: 3 loops: 1000000 avg: 573 usec Summary: pcpu_alloc_test passed: 3 failed: 0 repeat: 3 loops: 1000000 avg: 95802 usec All test took CPU0=192945605995 cycles The align_shift_alloc_test is expected to be failed. Stressing: In order to stress the vmalloc subsystem we run all available test cases on all available CPUs simultaneously. In order to prevent constant behaviour pattern, the test cases array is shuffled by default to randomize the order of test execution. For example if we want to run all tests(default), use all online CPUs(default) with shuffled order(default) and to repeat each test 30 times. The command would be like: modprobe vmalloc_test test_repeat_count=30 Expected results are the system is alive, there are no any BUG_ONs or Kernel Panics the tests are completed, no memory leaks. [urezki@gmail.com: fix 32-bit builds] Link: http://lkml.kernel.org/r/20190106214839.ffvjvmrn52uqog7k@pc636 [urezki@gmail.com: make CONFIG_TEST_VMALLOC depend on CONFIG_MMU] Link: http://lkml.kernel.org/r/20190219085441.s6bg2gpy4esny5vw@pc636 Link: http://lkml.kernel.org/r/20190103142108.20744-3-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Cc: Kees Cook Cc: Matthew Wilcox Cc: Michal Hocko Cc: Oleksiy Avramchenko Cc: Shuah Khan Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 13 ++ lib/Makefile | 1 + lib/test_vmalloc.c | 551 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 565 insertions(+) create mode 100644 lib/test_vmalloc.c (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a219f3488ad7..48f584393e28 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1875,6 +1875,19 @@ config TEST_LKM If unsure, say N. +config TEST_VMALLOC + tristate "Test module for stress/performance analysis of vmalloc allocator" + default n + depends on MMU + depends on m + help + This builds the "test_vmalloc" module that should be used for + stress and performance analysis. So, any new change for vmalloc + subsystem can be evaluated from performance and stability point + of view. + + If unsure, say N. + config TEST_USER_COPY tristate "Test user/kernel boundary protections" depends on m diff --git a/lib/Makefile b/lib/Makefile index e1b59da71418..cbfacd55aeca 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -60,6 +60,7 @@ UBSAN_SANITIZE_test_ubsan.o := y obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o obj-$(CONFIG_TEST_LKM) += test_module.o +obj-$(CONFIG_TEST_VMALLOC) += test_vmalloc.o obj-$(CONFIG_TEST_OVERFLOW) += test_overflow.o obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o obj-$(CONFIG_TEST_SORT) += test_sort.o diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c new file mode 100644 index 000000000000..83cdcaa82bf6 --- /dev/null +++ b/lib/test_vmalloc.c @@ -0,0 +1,551 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Test module for stress and analyze performance of vmalloc allocator. + * (C) 2018 Uladzislau Rezki (Sony) + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define __param(type, name, init, msg) \ + static type name = init; \ + module_param(name, type, 0444); \ + MODULE_PARM_DESC(name, msg) \ + +__param(bool, single_cpu_test, false, + "Use single first online CPU to run tests"); + +__param(bool, sequential_test_order, false, + "Use sequential stress tests order"); + +__param(int, test_repeat_count, 1, + "Set test repeat counter"); + +__param(int, test_loop_count, 1000000, + "Set test loop counter"); + +__param(int, run_test_mask, INT_MAX, + "Set tests specified in the mask.\n\n" + "\t\tid: 1, name: fix_size_alloc_test\n" + "\t\tid: 2, name: full_fit_alloc_test\n" + "\t\tid: 4, name: long_busy_list_alloc_test\n" + "\t\tid: 8, name: random_size_alloc_test\n" + "\t\tid: 16, name: fix_align_alloc_test\n" + "\t\tid: 32, name: random_size_align_alloc_test\n" + "\t\tid: 64, name: align_shift_alloc_test\n" + "\t\tid: 128, name: pcpu_alloc_test\n" + /* Add a new test case description here. */ +); + +/* + * Depends on single_cpu_test parameter. If it is true, then + * use first online CPU to trigger a test on, otherwise go with + * all online CPUs. + */ +static cpumask_t cpus_run_test_mask = CPU_MASK_NONE; + +/* + * Read write semaphore for synchronization of setup + * phase that is done in main thread and workers. + */ +static DECLARE_RWSEM(prepare_for_test_rwsem); + +/* + * Completion tracking for worker threads. + */ +static DECLARE_COMPLETION(test_all_done_comp); +static atomic_t test_n_undone = ATOMIC_INIT(0); + +static inline void +test_report_one_done(void) +{ + if (atomic_dec_and_test(&test_n_undone)) + complete(&test_all_done_comp); +} + +static int random_size_align_alloc_test(void) +{ + unsigned long size, align, rnd; + void *ptr; + int i; + + for (i = 0; i < test_loop_count; i++) { + get_random_bytes(&rnd, sizeof(rnd)); + + /* + * Maximum 1024 pages, if PAGE_SIZE is 4096. + */ + align = 1 << (rnd % 23); + + /* + * Maximum 10 pages. + */ + size = ((rnd % 10) + 1) * PAGE_SIZE; + + ptr = __vmalloc_node_range(size, align, + VMALLOC_START, VMALLOC_END, + GFP_KERNEL | __GFP_ZERO, + PAGE_KERNEL, + 0, 0, __builtin_return_address(0)); + + if (!ptr) + return -1; + + vfree(ptr); + } + + return 0; +} + +/* + * This test case is supposed to be failed. + */ +static int align_shift_alloc_test(void) +{ + unsigned long align; + void *ptr; + int i; + + for (i = 0; i < BITS_PER_LONG; i++) { + align = ((unsigned long) 1) << i; + + ptr = __vmalloc_node_range(PAGE_SIZE, align, + VMALLOC_START, VMALLOC_END, + GFP_KERNEL | __GFP_ZERO, + PAGE_KERNEL, + 0, 0, __builtin_return_address(0)); + + if (!ptr) + return -1; + + vfree(ptr); + } + + return 0; +} + +static int fix_align_alloc_test(void) +{ + void *ptr; + int i; + + for (i = 0; i < test_loop_count; i++) { + ptr = __vmalloc_node_range(5 * PAGE_SIZE, + THREAD_ALIGN << 1, + VMALLOC_START, VMALLOC_END, + GFP_KERNEL | __GFP_ZERO, + PAGE_KERNEL, + 0, 0, __builtin_return_address(0)); + + if (!ptr) + return -1; + + vfree(ptr); + } + + return 0; +} + +static int random_size_alloc_test(void) +{ + unsigned int n; + void *p; + int i; + + for (i = 0; i < test_loop_count; i++) { + get_random_bytes(&n, sizeof(i)); + n = (n % 100) + 1; + + p = vmalloc(n * PAGE_SIZE); + + if (!p) + return -1; + + *((__u8 *)p) = 1; + vfree(p); + } + + return 0; +} + +static int long_busy_list_alloc_test(void) +{ + void *ptr_1, *ptr_2; + void **ptr; + int rv = -1; + int i; + + ptr = vmalloc(sizeof(void *) * 15000); + if (!ptr) + return rv; + + for (i = 0; i < 15000; i++) + ptr[i] = vmalloc(1 * PAGE_SIZE); + + for (i = 0; i < test_loop_count; i++) { + ptr_1 = vmalloc(100 * PAGE_SIZE); + if (!ptr_1) + goto leave; + + ptr_2 = vmalloc(1 * PAGE_SIZE); + if (!ptr_2) { + vfree(ptr_1); + goto leave; + } + + *((__u8 *)ptr_1) = 0; + *((__u8 *)ptr_2) = 1; + + vfree(ptr_1); + vfree(ptr_2); + } + + /* Success */ + rv = 0; + +leave: + for (i = 0; i < 15000; i++) + vfree(ptr[i]); + + vfree(ptr); + return rv; +} + +static int full_fit_alloc_test(void) +{ + void **ptr, **junk_ptr, *tmp; + int junk_length; + int rv = -1; + int i; + + junk_length = fls(num_online_cpus()); + junk_length *= (32 * 1024 * 1024 / PAGE_SIZE); + + ptr = vmalloc(sizeof(void *) * junk_length); + if (!ptr) + return rv; + + junk_ptr = vmalloc(sizeof(void *) * junk_length); + if (!junk_ptr) { + vfree(ptr); + return rv; + } + + for (i = 0; i < junk_length; i++) { + ptr[i] = vmalloc(1 * PAGE_SIZE); + junk_ptr[i] = vmalloc(1 * PAGE_SIZE); + } + + for (i = 0; i < junk_length; i++) + vfree(junk_ptr[i]); + + for (i = 0; i < test_loop_count; i++) { + tmp = vmalloc(1 * PAGE_SIZE); + + if (!tmp) + goto error; + + *((__u8 *)tmp) = 1; + vfree(tmp); + } + + /* Success */ + rv = 0; + +error: + for (i = 0; i < junk_length; i++) + vfree(ptr[i]); + + vfree(ptr); + vfree(junk_ptr); + + return rv; +} + +static int fix_size_alloc_test(void) +{ + void *ptr; + int i; + + for (i = 0; i < test_loop_count; i++) { + ptr = vmalloc(3 * PAGE_SIZE); + + if (!ptr) + return -1; + + *((__u8 *)ptr) = 0; + + vfree(ptr); + } + + return 0; +} + +static int +pcpu_alloc_test(void) +{ + int rv = 0; +#ifndef CONFIG_NEED_PER_CPU_KM + void __percpu **pcpu; + size_t size, align; + int i; + + pcpu = vmalloc(sizeof(void __percpu *) * 35000); + if (!pcpu) + return -1; + + for (i = 0; i < 35000; i++) { + unsigned int r; + + get_random_bytes(&r, sizeof(i)); + size = (r % (PAGE_SIZE / 4)) + 1; + + /* + * Maximum PAGE_SIZE + */ + get_random_bytes(&r, sizeof(i)); + align = 1 << ((i % 11) + 1); + + pcpu[i] = __alloc_percpu(size, align); + if (!pcpu[i]) + rv = -1; + } + + for (i = 0; i < 35000; i++) + free_percpu(pcpu[i]); + + vfree(pcpu); +#endif + return rv; +} + +struct test_case_desc { + const char *test_name; + int (*test_func)(void); +}; + +static struct test_case_desc test_case_array[] = { + { "fix_size_alloc_test", fix_size_alloc_test }, + { "full_fit_alloc_test", full_fit_alloc_test }, + { "long_busy_list_alloc_test", long_busy_list_alloc_test }, + { "random_size_alloc_test", random_size_alloc_test }, + { "fix_align_alloc_test", fix_align_alloc_test }, + { "random_size_align_alloc_test", random_size_align_alloc_test }, + { "align_shift_alloc_test", align_shift_alloc_test }, + { "pcpu_alloc_test", pcpu_alloc_test }, + /* Add a new test case here. */ +}; + +struct test_case_data { + int test_failed; + int test_passed; + u64 time; +}; + +/* Split it to get rid of: WARNING: line over 80 characters */ +static struct test_case_data + per_cpu_test_data[NR_CPUS][ARRAY_SIZE(test_case_array)]; + +static struct test_driver { + struct task_struct *task; + unsigned long start; + unsigned long stop; + int cpu; +} per_cpu_test_driver[NR_CPUS]; + +static void shuffle_array(int *arr, int n) +{ + unsigned int rnd; + int i, j, x; + + for (i = n - 1; i > 0; i--) { + get_random_bytes(&rnd, sizeof(rnd)); + + /* Cut the range. */ + j = rnd % i; + + /* Swap indexes. */ + x = arr[i]; + arr[i] = arr[j]; + arr[j] = x; + } +} + +static int test_func(void *private) +{ + struct test_driver *t = private; + cpumask_t newmask = CPU_MASK_NONE; + int random_array[ARRAY_SIZE(test_case_array)]; + int index, i, j, ret; + ktime_t kt; + u64 delta; + + cpumask_set_cpu(t->cpu, &newmask); + set_cpus_allowed_ptr(current, &newmask); + + for (i = 0; i < ARRAY_SIZE(test_case_array); i++) + random_array[i] = i; + + if (!sequential_test_order) + shuffle_array(random_array, ARRAY_SIZE(test_case_array)); + + /* + * Block until initialization is done. + */ + down_read(&prepare_for_test_rwsem); + + t->start = get_cycles(); + for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { + index = random_array[i]; + + /* + * Skip tests if run_test_mask has been specified. + */ + if (!((run_test_mask & (1 << index)) >> index)) + continue; + + kt = ktime_get(); + for (j = 0; j < test_repeat_count; j++) { + ret = test_case_array[index].test_func(); + if (!ret) + per_cpu_test_data[t->cpu][index].test_passed++; + else + per_cpu_test_data[t->cpu][index].test_failed++; + } + + /* + * Take an average time that test took. + */ + delta = (u64) ktime_us_delta(ktime_get(), kt); + do_div(delta, (u32) test_repeat_count); + + per_cpu_test_data[t->cpu][index].time = delta; + } + t->stop = get_cycles(); + + up_read(&prepare_for_test_rwsem); + test_report_one_done(); + + /* + * Wait for the kthread_stop() call. + */ + while (!kthread_should_stop()) + msleep(10); + + return 0; +} + +static void +init_test_configurtion(void) +{ + /* + * Reset all data of all CPUs. + */ + memset(per_cpu_test_data, 0, sizeof(per_cpu_test_data)); + + if (single_cpu_test) + cpumask_set_cpu(cpumask_first(cpu_online_mask), + &cpus_run_test_mask); + else + cpumask_and(&cpus_run_test_mask, cpu_online_mask, + cpu_online_mask); + + if (test_repeat_count <= 0) + test_repeat_count = 1; + + if (test_loop_count <= 0) + test_loop_count = 1; +} + +static void do_concurrent_test(void) +{ + int cpu, ret; + + /* + * Set some basic configurations plus sanity check. + */ + init_test_configurtion(); + + /* + * Put on hold all workers. + */ + down_write(&prepare_for_test_rwsem); + + for_each_cpu(cpu, &cpus_run_test_mask) { + struct test_driver *t = &per_cpu_test_driver[cpu]; + + t->cpu = cpu; + t->task = kthread_run(test_func, t, "vmalloc_test/%d", cpu); + + if (!IS_ERR(t->task)) + /* Success. */ + atomic_inc(&test_n_undone); + else + pr_err("Failed to start kthread for %d CPU\n", cpu); + } + + /* + * Now let the workers do their job. + */ + up_write(&prepare_for_test_rwsem); + + /* + * Sleep quiet until all workers are done with 1 second + * interval. Since the test can take a lot of time we + * can run into a stack trace of the hung task. That is + * why we go with completion_timeout and HZ value. + */ + do { + ret = wait_for_completion_timeout(&test_all_done_comp, HZ); + } while (!ret); + + for_each_cpu(cpu, &cpus_run_test_mask) { + struct test_driver *t = &per_cpu_test_driver[cpu]; + int i; + + if (!IS_ERR(t->task)) + kthread_stop(t->task); + + for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { + if (!((run_test_mask & (1 << i)) >> i)) + continue; + + pr_info( + "Summary: %s passed: %d failed: %d repeat: %d loops: %d avg: %llu usec\n", + test_case_array[i].test_name, + per_cpu_test_data[cpu][i].test_passed, + per_cpu_test_data[cpu][i].test_failed, + test_repeat_count, test_loop_count, + per_cpu_test_data[cpu][i].time); + } + + pr_info("All test took CPU%d=%lu cycles\n", + cpu, t->stop - t->start); + } +} + +static int vmalloc_test_init(void) +{ + do_concurrent_test(); + return -EAGAIN; /* Fail will directly unload the module */ +} + +static void vmalloc_test_exit(void) +{ +} + +module_init(vmalloc_test_init) +module_exit(vmalloc_test_exit) + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Uladzislau Rezki"); +MODULE_DESCRIPTION("vmalloc test module"); -- cgit v1.2.3-59-g8ed1b From 8aa49762dba3e8ce9a52a9b6da221e61a0c6de08 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 5 Mar 2019 15:46:19 -0800 Subject: mm/page_owner: move config option to mm/Kconfig.debug Move the PAGE_OWNER option from submenu "Compile-time checks and compiler options" to dedicated submenu "Memory Debugging". Link: http://lkml.kernel.org/r/20190120024254.6270-1-changbin.du@gmail.com Signed-off-by: Changbin Du Acked-by: Vlastimil Babka Cc: Masahiro Yamada Cc: Ingo Molnar Cc: Arnd Bergmann Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 17 ----------------- mm/Kconfig.debug | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 48f584393e28..e6a7b01932e6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -265,23 +265,6 @@ config UNUSED_SYMBOLS you really need it, and what the merge plan to the mainline kernel for your module is. -config PAGE_OWNER - bool "Track page owner" - depends on DEBUG_KERNEL && STACKTRACE_SUPPORT - select DEBUG_FS - select STACKTRACE - select STACKDEPOT - select PAGE_EXTENSION - help - This keeps track of what call chain is the owner of a page, may - help to find bare alloc_page(s) leaks. Even if you include this - feature on your build, it is disabled in default. You should pass - "page_owner=on" to boot parameter in order to enable it. Eats - a fair amount of memory if enabled. See tools/vm/page_owner_sort.c - for user-space helper. - - If unsure, say N. - config DEBUG_FS bool "Debug Filesystem" help diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 9a7b8b049d04..e3df921208c0 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -39,6 +39,23 @@ config DEBUG_PAGEALLOC_ENABLE_DEFAULT Enable debug page memory allocations by default? This value can be overridden by debug_pagealloc=off|on. +config PAGE_OWNER + bool "Track page owner" + depends on DEBUG_KERNEL && STACKTRACE_SUPPORT + select DEBUG_FS + select STACKTRACE + select STACKDEPOT + select PAGE_EXTENSION + help + This keeps track of what call chain is the owner of a page, may + help to find bare alloc_page(s) leaks. Even if you include this + feature on your build, it is disabled in default. You should pass + "page_owner=on" to boot parameter in order to enable it. Eats + a fair amount of memory if enabled. See tools/vm/page_owner_sort.c + for user-space helper. + + If unsure, say N. + config PAGE_POISONING bool "Poison pages after freeing" select PAGE_POISONING_NO_SANITY if HIBERNATION -- cgit v1.2.3-59-g8ed1b