From 6b74ab97bc12ce74acec900f1d89a4aee2e4d70d Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 23 Jul 2008 21:26:49 -0700 Subject: mm: add a basic debugging framework for memory initialisation Boot initialisation is very complex, with significant numbers of architecture-specific routines, hooks and code ordering. While significant amounts of the initialisation is architecture-independent, it trusts the data received from the architecture layer. This is a mistake, and has resulted in a number of difficult-to-diagnose bugs. This patchset adds some validation and tracing to memory initialisation. It also introduces a few basic defensive measures. The validation code can be explicitly disabled for embedded systems. This patch: Add additional debugging and verification code for memory initialisation. Once enabled, the verification checks are always run and when required additional debugging information may be outputted via a mminit_loglevel= command-line parameter. The verification code is placed in a new file mm/mm_init.c. Ideally other mm initialisation code will be moved here over time. Signed-off-by: Mel Gorman Cc: Christoph Lameter Cc: Andy Whitcroft Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mm_init.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 mm/mm_init.c (limited to 'mm/mm_init.c') diff --git a/mm/mm_init.c b/mm/mm_init.c new file mode 100644 index 000000000000..c01d8dfec817 --- /dev/null +++ b/mm/mm_init.c @@ -0,0 +1,18 @@ +/* + * mm_init.c - Memory initialisation verification and debugging + * + * Copyright 2008 IBM Corporation, 2008 + * Author Mel Gorman + * + */ +#include +#include + +int __meminitdata mminit_loglevel; + +static __init int set_mminit_loglevel(char *str) +{ + get_option(&str, &mminit_loglevel); + return 0; +} +early_param("mminit_loglevel", set_mminit_loglevel); -- cgit v1.2.3-59-g8ed1b From 708614e6180f398cd307ea0048d48ba6fa274610 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 23 Jul 2008 21:26:51 -0700 Subject: mm: verify the page links and memory model Print out information on how the page flags are being used if mminit_loglevel is MMINIT_VERIFY or higher and unconditionally performs sanity checks on the flags regardless of loglevel. When the page flags are updated with section, node and zone information, a check are made to ensure the values can be retrieved correctly. Finally we confirm that pfn_to_page and page_to_pfn are the correct inverse functions. [akpm@linux-foundation.org: fix printk warnings] Signed-off-by: Mel Gorman Cc: Christoph Lameter Cc: Andy Whitcroft Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/internal.h | 12 ++++++++++ mm/mm_init.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ mm/page_alloc.c | 8 +++++++ 3 files changed, 91 insertions(+) (limited to 'mm/mm_init.c') diff --git a/mm/internal.h b/mm/internal.h index a7ee05253294..7a4a2885dc8e 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -78,6 +78,10 @@ do { \ } \ } while (0) +extern void mminit_verify_pageflags_layout(void); +extern void mminit_verify_page_links(struct page *page, + enum zone_type zone, unsigned long nid, unsigned long pfn); + #else static inline void mminit_dprintk(enum mminit_level level, @@ -85,5 +89,13 @@ static inline void mminit_dprintk(enum mminit_level level, { } +static inline void mminit_verify_pageflags_layout(void) +{ +} + +static inline void mminit_verify_page_links(struct page *page, + enum zone_type zone, unsigned long nid, unsigned long pfn) +{ +} #endif /* CONFIG_DEBUG_MEMORY_INIT */ #endif diff --git a/mm/mm_init.c b/mm/mm_init.c index c01d8dfec817..e16990d629e6 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -7,9 +7,80 @@ */ #include #include +#include "internal.h" int __meminitdata mminit_loglevel; +void __init mminit_verify_pageflags_layout(void) +{ + int shift, width; + unsigned long or_mask, add_mask; + + shift = 8 * sizeof(unsigned long); + width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH; + mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths", + "Section %d Node %d Zone %d Flags %d\n", + SECTIONS_WIDTH, + NODES_WIDTH, + ZONES_WIDTH, + NR_PAGEFLAGS); + mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts", + "Section %d Node %d Zone %d\n", +#ifdef SECTIONS_SHIFT + SECTIONS_SHIFT, +#else + 0, +#endif + NODES_SHIFT, + ZONES_SHIFT); + mminit_dprintk(MMINIT_TRACE, "pageflags_layout_offsets", + "Section %lu Node %lu Zone %lu\n", + (unsigned long)SECTIONS_PGSHIFT, + (unsigned long)NODES_PGSHIFT, + (unsigned long)ZONES_PGSHIFT); + mminit_dprintk(MMINIT_TRACE, "pageflags_layout_zoneid", + "Zone ID: %lu -> %lu\n", + (unsigned long)ZONEID_PGOFF, + (unsigned long)(ZONEID_PGOFF + ZONEID_SHIFT)); + mminit_dprintk(MMINIT_TRACE, "pageflags_layout_usage", + "location: %d -> %d unused %d -> %d flags %d -> %d\n", + shift, width, width, NR_PAGEFLAGS, NR_PAGEFLAGS, 0); +#ifdef NODE_NOT_IN_PAGE_FLAGS + mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodeflags", + "Node not in page flags"); +#endif + + if (SECTIONS_WIDTH) { + shift -= SECTIONS_WIDTH; + BUG_ON(shift != SECTIONS_PGSHIFT); + } + if (NODES_WIDTH) { + shift -= NODES_WIDTH; + BUG_ON(shift != NODES_PGSHIFT); + } + if (ZONES_WIDTH) { + shift -= ZONES_WIDTH; + BUG_ON(shift != ZONES_PGSHIFT); + } + + /* Check for bitmask overlaps */ + or_mask = (ZONES_MASK << ZONES_PGSHIFT) | + (NODES_MASK << NODES_PGSHIFT) | + (SECTIONS_MASK << SECTIONS_PGSHIFT); + add_mask = (ZONES_MASK << ZONES_PGSHIFT) + + (NODES_MASK << NODES_PGSHIFT) + + (SECTIONS_MASK << SECTIONS_PGSHIFT); + BUG_ON(or_mask != add_mask); +} + +void __meminit mminit_verify_page_links(struct page *page, enum zone_type zone, + unsigned long nid, unsigned long pfn) +{ + BUG_ON(page_to_nid(page) != nid); + BUG_ON(page_zonenum(page) != zone); + BUG_ON(page_to_pfn(page) != pfn); +} + static __init int set_mminit_loglevel(char *str) { get_option(&str, &mminit_loglevel); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0908352ba727..acab6ad326df 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2534,6 +2534,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, } page = pfn_to_page(pfn); set_page_links(page, zone, nid, pfn); + mminit_verify_page_links(page, zone, nid, pfn); init_page_count(page); reset_page_mapcount(page); SetPageReserved(page); @@ -2836,6 +2837,12 @@ __meminit int init_currently_empty_zone(struct zone *zone, zone->zone_start_pfn = zone_start_pfn; + mminit_dprintk(MMINIT_TRACE, "memmap_init", + "Initialising map node %d zone %lu pfns %lu -> %lu\n", + pgdat->node_id, + (unsigned long)zone_idx(zone), + zone_start_pfn, (zone_start_pfn + size)); + zone_init_free_lists(zone); return 0; @@ -3961,6 +3968,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) early_node_map[i].end_pfn); /* Initialise every node */ + mminit_verify_pageflags_layout(); setup_nr_node_ids(); for_each_online_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); -- cgit v1.2.3-59-g8ed1b From 68ad8df42e12037c3894c9706ab428bf5cd6426b Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 23 Jul 2008 21:26:52 -0700 Subject: mm: print out the zonelists on request for manual verification This patch prints out the zonelists during boot for manual verification by the user if the mminit_loglevel is MMINIT_VERIFY or higher. Signed-off-by: Mel Gorman Cc: Christoph Lameter Cc: Andy Whitcroft Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/internal.h | 5 +++++ mm/mm_init.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ mm/page_alloc.c | 1 + 3 files changed, 51 insertions(+) (limited to 'mm/mm_init.c') diff --git a/mm/internal.h b/mm/internal.h index 5d17f3efac41..50807e12490e 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -81,6 +81,7 @@ do { \ extern void mminit_verify_pageflags_layout(void); extern void mminit_verify_page_links(struct page *page, enum zone_type zone, unsigned long nid, unsigned long pfn); +extern void mminit_verify_zonelist(void); #else @@ -97,6 +98,10 @@ static inline void mminit_verify_page_links(struct page *page, enum zone_type zone, unsigned long nid, unsigned long pfn) { } + +static inline void mminit_verify_zonelist(void) +{ +} #endif /* CONFIG_DEBUG_MEMORY_INIT */ /* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */ diff --git a/mm/mm_init.c b/mm/mm_init.c index e16990d629e6..ce445ca097e7 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -11,6 +11,51 @@ int __meminitdata mminit_loglevel; +/* The zonelists are simply reported, validation is manual. */ +void mminit_verify_zonelist(void) +{ + int nid; + + if (mminit_loglevel < MMINIT_VERIFY) + return; + + for_each_online_node(nid) { + pg_data_t *pgdat = NODE_DATA(nid); + struct zone *zone; + struct zoneref *z; + struct zonelist *zonelist; + int i, listid, zoneid; + + BUG_ON(MAX_ZONELISTS > 2); + for (i = 0; i < MAX_ZONELISTS * MAX_NR_ZONES; i++) { + + /* Identify the zone and nodelist */ + zoneid = i % MAX_NR_ZONES; + listid = i / MAX_NR_ZONES; + zonelist = &pgdat->node_zonelists[listid]; + zone = &pgdat->node_zones[zoneid]; + if (!populated_zone(zone)) + continue; + + /* Print information about the zonelist */ + printk(KERN_DEBUG "mminit::zonelist %s %d:%s = ", + listid > 0 ? "thisnode" : "general", nid, + zone->name); + + /* Iterate the zonelist */ + for_each_zone_zonelist(zone, z, zonelist, zoneid) { +#ifdef CONFIG_NUMA + printk(KERN_CONT "%d:%s ", + zone->node, zone->name); +#else + printk(KERN_CONT "0:%s ", zone->name); +#endif /* CONFIG_NUMA */ + } + printk(KERN_CONT "\n"); + } + } +} + void __init mminit_verify_pageflags_layout(void) { int shift, width; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0adb66e711e6..9ece07ce65b0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2352,6 +2352,7 @@ void build_all_zonelists(void) if (system_state == SYSTEM_BOOTING) { __build_all_zonelists(NULL); + mminit_verify_zonelist(); cpuset_init_current_mems_allowed(); } else { /* we have to stop all cpus to guarantee there is no user -- cgit v1.2.3-59-g8ed1b From 5e9426abe209cf134adbbd62c5e73ef534eb73e9 Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Wed, 23 Jul 2008 21:27:39 -0700 Subject: mm: remove mm_init compilation dependency on CONFIG_DEBUG_MEMORY_INIT Towards the end of putting all core mm initialization in mm_init.c, I plan on putting the creation of a mm kobject in a function in that file. However, the file is currently only compiled if CONFIG_DEBUG_MEMORY_INIT is set. Remove this dependency, but put the code under an #ifdef on the same config option. This should result in no functional changes. Signed-off-by: Nishanth Aravamudan Cc: Nick Piggin Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/Makefile | 3 +-- mm/mm_init.c | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'mm/mm_init.c') diff --git a/mm/Makefile b/mm/Makefile index 4bbc8f094ff0..06ca2381fef1 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -11,7 +11,7 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ maccess.o page_alloc.o page-writeback.o pdflush.o \ readahead.o swap.o truncate.o vmscan.o \ prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ - page_isolation.o $(mmu-y) + page_isolation.o mm_init.o $(mmu-y) obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o obj-$(CONFIG_BOUNCE) += bounce.o @@ -26,7 +26,6 @@ obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o obj-$(CONFIG_SLOB) += slob.o obj-$(CONFIG_SLAB) += slab.o -obj-$(CONFIG_DEBUG_MEMORY_INIT) += mm_init.o obj-$(CONFIG_SLUB) += slub.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o diff --git a/mm/mm_init.c b/mm/mm_init.c index ce445ca097e7..eaf0d3b47099 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -9,6 +9,7 @@ #include #include "internal.h" +#ifdef CONFIG_DEBUG_MEMORY_INIT int __meminitdata mminit_loglevel; /* The zonelists are simply reported, validation is manual. */ @@ -132,3 +133,4 @@ static __init int set_mminit_loglevel(char *str) return 0; } early_param("mminit_loglevel", set_mminit_loglevel); +#endif /* CONFIG_DEBUG_MEMORY_INIT */ -- cgit v1.2.3-59-g8ed1b From ff7ea79cf7c3a481851bd4b2185fdeb6ce4afa29 Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Wed, 23 Jul 2008 21:27:39 -0700 Subject: mm: create /sys/kernel/mm Add a kobject to create /sys/kernel/mm when sysfs is mounted. The kobject will exist regardless. This will allow for the hugepage related sysfs directories to exist under the mm "subsystem" directory. Add an ABI file appropriately. [kosaki.motohiro@jp.fujitsu.com: fix build] Signed-off-by: Nishanth Aravamudan Cc: Nick Piggin Cc: Mel Gorman Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/sysfs-kernel-mm | 6 ++++++ include/linux/kobject.h | 2 ++ mm/mm_init.c | 16 ++++++++++++++++ 3 files changed, 24 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-kernel-mm (limited to 'mm/mm_init.c') diff --git a/Documentation/ABI/testing/sysfs-kernel-mm b/Documentation/ABI/testing/sysfs-kernel-mm new file mode 100644 index 000000000000..190d523ac159 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-mm @@ -0,0 +1,6 @@ +What: /sys/kernel/mm +Date: July 2008 +Contact: Nishanth Aravamudan , VM maintainers +Description: + /sys/kernel/mm/ should contain any and all VM + related information in /sys/kernel/. diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 60f0d418ae32..5437ac0276e2 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -186,6 +186,8 @@ extern struct kobject *kset_find_obj(struct kset *, const char *); /* The global /sys/kernel/ kobject for people to chain off of */ extern struct kobject *kernel_kobj; +/* The global /sys/kernel/mm/ kobject for people to chain off of */ +extern struct kobject *mm_kobj; /* The global /sys/hypervisor/ kobject for people to chain off of */ extern struct kobject *hypervisor_kobj; /* The global /sys/power/ kobject for people to chain off of */ diff --git a/mm/mm_init.c b/mm/mm_init.c index eaf0d3b47099..c6af41ea9994 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -7,6 +7,8 @@ */ #include #include +#include +#include #include "internal.h" #ifdef CONFIG_DEBUG_MEMORY_INIT @@ -134,3 +136,17 @@ static __init int set_mminit_loglevel(char *str) } early_param("mminit_loglevel", set_mminit_loglevel); #endif /* CONFIG_DEBUG_MEMORY_INIT */ + +struct kobject *mm_kobj; +EXPORT_SYMBOL_GPL(mm_kobj); + +static int __init mm_sysfs_init(void) +{ + mm_kobj = kobject_create_and_add("mm", kernel_kobj); + if (!mm_kobj) + return -ENOMEM; + + return 0; +} + +__initcall(mm_sysfs_init); -- cgit v1.2.3-59-g8ed1b From 5c9ffc9c3d61dfcafd7cdb61c7b94f2d7ac408fb Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 5 Aug 2008 13:01:01 -0700 Subject: mm_init.c: avoid ifdef-inside-macro-expansion gcc-3.2: mm/mm_init.c:77:1: directives may not be used inside a macro argument mm/mm_init.c:76:47: unterminated argument list invoking macro "mminit_dprintk" mm/mm_init.c: In function `mminit_verify_pageflags_layout': mm/mm_init.c:80: `mminit_dprintk' undeclared (first use in this function) mm/mm_init.c:80: (Each undeclared identifier is reported only once mm/mm_init.c:80: for each function it appears in.) mm/mm_init.c:80: syntax error before numeric constant Also fix a typo in a comment. Reported-by: Adrian Bunk Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mm_init.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'mm/mm_init.c') diff --git a/mm/mm_init.c b/mm/mm_init.c index c6af41ea9994..936ef2efd892 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -14,6 +14,10 @@ #ifdef CONFIG_DEBUG_MEMORY_INIT int __meminitdata mminit_loglevel; +#ifndef SECTIONS_SHIFT +#define SECTIONS_SHIFT 0 +#endif + /* The zonelists are simply reported, validation is manual. */ void mminit_verify_zonelist(void) { @@ -74,11 +78,7 @@ void __init mminit_verify_pageflags_layout(void) NR_PAGEFLAGS); mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts", "Section %d Node %d Zone %d\n", -#ifdef SECTIONS_SHIFT SECTIONS_SHIFT, -#else - 0, -#endif NODES_SHIFT, ZONES_SHIFT); mminit_dprintk(MMINIT_TRACE, "pageflags_layout_offsets", -- cgit v1.2.3-59-g8ed1b