From fa28e067c3b8af96c79c060e163b1387c172ae75 Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Sat, 9 Feb 2008 23:24:08 +0100 Subject: x86: GEODE: MFGPT: drop module owner usage from MFGPT API We had planned to use the 'owner' field for allowing re-allocation of MFGPTs; however, doing it by module owner name isn't flexible enough. So, drop this for now. If it turns out that we need timers in modules, we'll need to come up with a scheme that matches the write-once fields of the MFGPTx_SETUP register, and drops ponies from the sky. Signed-off-by: Andres Salomon Signed-off-by: Jordan Crouse Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/geode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86/geode.h b/include/asm-x86/geode.h index 811fe14f70b2..c4482753a358 100644 --- a/include/asm-x86/geode.h +++ b/include/asm-x86/geode.h @@ -209,7 +209,7 @@ static inline u16 geode_mfgpt_read(int timer, u16 reg) extern int __init geode_mfgpt_detect(void); extern int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable); extern int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable); -extern int geode_mfgpt_alloc_timer(int timer, int domain, struct module *owner); +extern int geode_mfgpt_alloc_timer(int timer, int domain); #define geode_mfgpt_setup_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 1) #define geode_mfgpt_release_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 0) -- cgit v1.2.3-59-g8ed1b From b0e6bf2571e9385335e6337bdedb85cb629ab3fb Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Sat, 9 Feb 2008 23:24:08 +0100 Subject: x86: GEODE: MFGPT: make mfgpt_timer_setup available outside of mfgpt_32.c We need to be called from elsewhere, and this gets some #ifdefs out of the .c file. Signed-off-by: Andres Salomon Signed-off-by: Jordan Crouse Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/mfgpt_32.c | 8 +------- include/asm-x86/geode.h | 6 ++++++ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 6f79061cf119..5cf3a839530c 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c @@ -43,12 +43,6 @@ static struct mfgpt_timer_t { #define MFGPT_HZ (32768 / MFGPT_DIVISOR) #define MFGPT_PERIODIC (MFGPT_HZ / HZ) -#ifdef CONFIG_GEODE_MFGPT_TIMER -static int __init mfgpt_timer_setup(void); -#else -#define mfgpt_timer_setup() (0) -#endif - /* Allow for disabling of MFGPTs */ static int disable; static int __init mfgpt_disable(char *s) @@ -314,7 +308,7 @@ static struct irqaction mfgptirq = { .name = "mfgpt-timer" }; -static int __init mfgpt_timer_setup(void) +int __init mfgpt_timer_setup(void) { int timer, ret; u16 val; diff --git a/include/asm-x86/geode.h b/include/asm-x86/geode.h index c4482753a358..c13630655d62 100644 --- a/include/asm-x86/geode.h +++ b/include/asm-x86/geode.h @@ -214,4 +214,10 @@ extern int geode_mfgpt_alloc_timer(int timer, int domain); #define geode_mfgpt_setup_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 1) #define geode_mfgpt_release_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 0) +#ifdef CONFIG_GEODE_MFGPT_TIMER +extern int __init mfgpt_timer_setup(void); +#else +static inline int mfgpt_timer_setup(void) { return 0; } +#endif + #endif -- cgit v1.2.3-59-g8ed1b From f087515c658a68454d43909d482ea4b59e7d6d5c Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Sat, 9 Feb 2008 23:24:08 +0100 Subject: x86: GEODE: MFGPT: Use "just-in-time" detection for the MFGPT timers There isn't much value to always detecting the MFGPT timers on Geode platforms; detection is only needed when something wants to use the timers. Move the detection code so that it gets called the first time a timer is allocated. Signed-off-by: Jordan Crouse Signed-off-by: Andres Salomon Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/geode_32.c | 5 +---- arch/x86/kernel/mfgpt_32.c | 39 +++++++++++++++++++++++++++------------ include/asm-x86/geode.h | 1 - 3 files changed, 28 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c index 9c7f7d395968..9dad6ca6cd70 100644 --- a/arch/x86/kernel/geode_32.c +++ b/arch/x86/kernel/geode_32.c @@ -163,14 +163,11 @@ EXPORT_SYMBOL_GPL(geode_gpio_setup_event); static int __init geode_southbridge_init(void) { - int timers; - if (!is_geode()) return -ENODEV; init_lbars(); - timers = geode_mfgpt_detect(); - printk(KERN_INFO "geode: %d MFGPT timers available.\n", timers); + (void) mfgpt_timer_setup(); return 0; } diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 5cf3a839530c..abdb7c71199a 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c @@ -74,28 +74,37 @@ __setup("mfgptfix", mfgpt_fix); * In other cases (such as with VSAless OpenFirmware), the system firmware * leaves timers available for us to use. */ -int __init geode_mfgpt_detect(void) + + +static int timers = -1; + +static void geode_mfgpt_detect(void) { - int count = 0, i; + int i; u16 val; + timers = 0; + if (disable) { - printk(KERN_INFO "geode-mfgpt: Skipping MFGPT setup\n"); - return 0; + printk(KERN_INFO "geode-mfgpt: MFGPT support is disabled\n"); + goto done; + } + + if (!geode_get_dev_base(GEODE_DEV_MFGPT)) { + printk(KERN_INFO "geode-mfgpt: MFGPT LBAR is not set up\n"); + goto done; } for (i = 0; i < MFGPT_MAX_TIMERS; i++) { val = geode_mfgpt_read(i, MFGPT_REG_SETUP); if (!(val & MFGPT_SETUP_SETUP)) { mfgpt_timers[i].avail = 1; - count++; + timers++; } } - /* set up clock event device, if desired */ - i = mfgpt_timer_setup(); - - return count; +done: + printk(KERN_INFO "geode-mfgpt: %d MFGPT timers available.\n", timers); } int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable) @@ -183,10 +192,16 @@ int geode_mfgpt_alloc_timer(int timer, int domain) { int i; - if (!geode_get_dev_base(GEODE_DEV_MFGPT)) - return -ENODEV; + if (timers == -1) { + /* timers haven't been detected yet */ + geode_mfgpt_detect(); + } + + if (!timers) + return -1; + if (timer >= MFGPT_MAX_TIMERS) - return -EIO; + return -1; if (timer < 0) { /* Try to find an available timer */ diff --git a/include/asm-x86/geode.h b/include/asm-x86/geode.h index c13630655d62..9e7280092a48 100644 --- a/include/asm-x86/geode.h +++ b/include/asm-x86/geode.h @@ -206,7 +206,6 @@ static inline u16 geode_mfgpt_read(int timer, u16 reg) return inw(base + reg + (timer * 8)); } -extern int __init geode_mfgpt_detect(void); extern int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable); extern int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable); extern int geode_mfgpt_alloc_timer(int timer, int domain); -- cgit v1.2.3-59-g8ed1b From da7bfc50f5cb54aeee8147dca0c1de9d487cb5e0 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Sat, 9 Feb 2008 23:24:08 +0100 Subject: x86: sparse warnings in pageattr.c Adjust the definition of lookup_address to take an unsigned long level argument. Adjust callers in xen/mmu.c that pass in a dummy variable. Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 8 +++++--- arch/x86/xen/mmu.c | 6 +++--- include/asm-x86/pgtable.h | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 8493c855582b..eb2a54415a77 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -191,7 +191,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address) * or when the present bit is not set. Otherwise we would return a * pointer to a nonexisting mapping. */ -pte_t *lookup_address(unsigned long address, int *level) +pte_t *lookup_address(unsigned long address, unsigned int *level) { pgd_t *pgd = pgd_offset_k(address); pud_t *pud; @@ -255,7 +255,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, unsigned long nextpage_addr, numpages, pmask, psize, flags; pte_t new_pte, old_pte, *tmp; pgprot_t old_prot, new_prot; - int level, do_split = 1; + int do_split = 1; + unsigned int level; spin_lock_irqsave(&pgd_lock, flags); /* @@ -406,7 +407,8 @@ out_unlock: static int __change_page_attr(unsigned long address, struct cpa_data *cpa) { - int level, do_split, err; + int do_split, err; + unsigned int level; struct page *kpte_page; pte_t *kpte; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 45aa771e73a9..0144395448ae 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -58,7 +58,7 @@ xmaddr_t arbitrary_virt_to_machine(unsigned long address) { - int level; + unsigned int level; pte_t *pte = lookup_address(address, &level); unsigned offset = address & PAGE_MASK; @@ -71,7 +71,7 @@ void make_lowmem_page_readonly(void *vaddr) { pte_t *pte, ptev; unsigned long address = (unsigned long)vaddr; - int level; + unsigned int level; pte = lookup_address(address, &level); BUG_ON(pte == NULL); @@ -86,7 +86,7 @@ void make_lowmem_page_readwrite(void *vaddr) { pte_t *pte, ptev; unsigned long address = (unsigned long)vaddr; - int level; + unsigned int level; pte = lookup_address(address, &level); BUG_ON(pte == NULL); diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index 44c0a4f1b1eb..174b87738714 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -255,7 +255,7 @@ enum { * NOTE: the return type is pte_t but if the pmd is PSE then we return it * as a pte too. */ -extern pte_t *lookup_address(unsigned long address, int *level); +extern pte_t *lookup_address(unsigned long address, unsigned int *level); /* local pte updates need not use xchg for locking */ static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) -- cgit v1.2.3-59-g8ed1b From 6697c05296fab4d113c7144459b72b6172b485a5 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Sat, 9 Feb 2008 23:24:08 +0100 Subject: x86: fix sparse warnings in acpi/bus.c Add function definition and extern variables to asm-x86/acpi.h. All of these are used in bus.c in ifdef(CONFIG_X86) sections, so are only added to the x86 include headers. boot.c already includes acpi.h so no changes are needed there. Fixes the following: arch/x86/kernel/acpi/boot.c:83:4: warning: symbol 'acpi_sci_flags' was not declared. Should it be static? arch/x86/kernel/acpi/boot.c:84:5: warning: symbol 'acpi_sci_override_gsi' was not declared. Should it be static? arch/x86/kernel/acpi/boot.c:421:13: warning: symbol 'acpi_pic_sci_set_trigger' was not declared. Should it be static? Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- drivers/acpi/bus.c | 7 +------ include/asm-x86/acpi.h | 4 ++++ 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 8b0d4b7d188a..ce3c0a2cbac4 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -31,6 +31,7 @@ #include #include #include +#include #ifdef CONFIG_X86 #include #endif @@ -39,9 +40,6 @@ #define _COMPONENT ACPI_BUS_COMPONENT ACPI_MODULE_NAME("bus"); -#ifdef CONFIG_X86 -extern void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger); -#endif struct acpi_device *acpi_root; struct proc_dir_entry *acpi_root_dir; @@ -653,8 +651,6 @@ void __init acpi_early_init(void) #ifdef CONFIG_X86 if (!acpi_ioapic) { - extern u8 acpi_sci_flags; - /* compatible (0) means level (3) */ if (!(acpi_sci_flags & ACPI_MADT_TRIGGER_MASK)) { acpi_sci_flags &= ~ACPI_MADT_TRIGGER_MASK; @@ -664,7 +660,6 @@ void __init acpi_early_init(void) acpi_pic_sci_set_trigger(acpi_gbl_FADT.sci_interrupt, (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) >> 2); } else { - extern int acpi_sci_override_gsi; /* * now that acpi_gbl_FADT is initialized, * update it with result from INT_SRC_OVR parsing diff --git a/include/asm-x86/acpi.h b/include/asm-x86/acpi.h index 98a9ca266531..7a72d6aa50be 100644 --- a/include/asm-x86/acpi.h +++ b/include/asm-x86/acpi.h @@ -89,6 +89,10 @@ extern int acpi_pci_disabled; extern int acpi_skip_timer_override; extern int acpi_use_timer_override; +extern u8 acpi_sci_flags; +extern int acpi_sci_override_gsi; +void acpi_pic_sci_set_trigger(unsigned int, u16); + static inline void disable_acpi(void) { acpi_disabled = 1; -- cgit v1.2.3-59-g8ed1b From 185c045c245f46485ad8bbd8cc1100e986ff3f13 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Sat, 9 Feb 2008 23:24:09 +0100 Subject: x86, core: remove CONFIG_FORCED_INLINING Other than the defconfigs, remove the entry in compiler-gcc4.h, Kconfig.debug and feature-removal-schedule.txt. Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- Documentation/feature-removal-schedule.txt | 9 --------- arch/x86/configs/i386_defconfig | 1 - arch/x86/configs/x86_64_defconfig | 1 - include/linux/compiler-gcc4.h | 9 --------- lib/Kconfig.debug | 14 -------------- 5 files changed, 34 deletions(-) (limited to 'include') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index ce9503c892b5..557f4a2f1655 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -111,15 +111,6 @@ Who: Christoph Hellwig --------------------------- -What: CONFIG_FORCED_INLINING -When: June 2006 -Why: Config option is there to see if gcc is good enough. (in january - 2006). If it is, the behavior should just be the default. If it's not, - the option should just go away entirely. -Who: Arjan van de Ven - ---------------------------- - What: eepro100 network driver When: January 2007 Why: replaced by the e100 driver diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 77562e7cdab6..3df340b54e57 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -1421,7 +1421,6 @@ CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_LIST is not set # CONFIG_FRAME_POINTER is not set -# CONFIG_FORCED_INLINING is not set # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_LKDTM is not set # CONFIG_FAULT_INJECTION is not set diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 9e2b0ef851de..eef98cb00c62 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -1346,7 +1346,6 @@ CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_LIST is not set # CONFIG_FRAME_POINTER is not set -# CONFIG_FORCED_INLINING is not set # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_LKDTM is not set # CONFIG_FAULT_INJECTION is not set diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 0ab3a3232330..974f5b7bb205 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -5,15 +5,6 @@ /* These definitions are for GCC v4.x. */ #include -#ifdef CONFIG_FORCED_INLINING -# undef inline -# undef __inline__ -# undef __inline -# define inline inline __attribute__((always_inline)) -# define __inline__ __inline__ __attribute__((always_inline)) -# define __inline __inline __attribute__((always_inline)) -#endif - #define __used __attribute__((__used__)) #define __must_check __attribute__((warn_unused_result)) #define __compiler_offsetof(a,b) __builtin_offsetof(a,b) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ce0bb2600c25..a370fe828a79 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -465,20 +465,6 @@ config FRAME_POINTER some architectures or if you use external debuggers. If you don't debug the kernel, you can say N. -config FORCED_INLINING - bool "Force gcc to inline functions marked 'inline'" - depends on DEBUG_KERNEL - default y - help - This option determines if the kernel forces gcc to inline the functions - developers have marked 'inline'. Doing so takes away freedom from gcc to - do what it thinks is best, which is desirable for the gcc 3.x series of - compilers. The gcc 4.x series have a rewritten inlining algorithm and - disabling this option will generate a smaller kernel there. Hopefully - this algorithm is so good that allowing gcc4 to make the decision can - become the default in the future, until then this option is there to - test gcc for this. - config BOOT_PRINTK_DELAY bool "Delay each boot printk message by N milliseconds" depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY -- cgit v1.2.3-59-g8ed1b From 551889a6e2a24a9c06fd453ea03b57b7746ffdc0 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Sat, 9 Feb 2008 23:24:09 +0100 Subject: x86: construct 32-bit boot time page tables in native format. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Specifically the boot time page tables in a CONFIG_X86_PAE=y enabled kernel are in PAE format. early_ioremap is updated to use the standard page table accessors. Clear any mappings beyond max_low_pfn from the boot page tables in native_pagetable_setup_start because the initial mappings can extend beyond the range of physical memory and into the vmalloc area. Derived from patches by Eric Biederman and H. Peter Anvin. [ jeremy@goop.org: PAE swapper_pg_dir needs to be page-sized fix ] Signed-off-by: Ian Campbell Cc: H. Peter Anvin Cc: Eric W. Biederman Cc: Andi Kleen Cc: Mika Penttilä Cc: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/head_32.S | 151 +++++++++++++++++++++++++++++++++---------- arch/x86/kernel/setup_32.c | 4 ++ arch/x86/mm/init_32.c | 72 ++++++++------------- arch/x86/mm/ioremap.c | 55 +++++++++------- include/asm-x86/page_32.h | 1 - include/asm-x86/pgtable_32.h | 4 -- 6 files changed, 178 insertions(+), 109 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 5d8c5730686b..74ef4a41f224 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -19,6 +19,10 @@ #include #include #include +#include + +/* Physical address */ +#define pa(X) ((X) - __PAGE_OFFSET) /* * References to members of the new_cpu_data structure. @@ -80,10 +84,6 @@ INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_ */ .section .text.head,"ax",@progbits ENTRY(startup_32) - /* check to see if KEEP_SEGMENTS flag is meaningful */ - cmpw $0x207, BP_version(%esi) - jb 1f - /* test KEEP_SEGMENTS flag to see if the bootloader is asking us to not reload segments */ testb $(1<<6), BP_loadflags(%esi) @@ -92,7 +92,7 @@ ENTRY(startup_32) /* * Set segments to known values. */ -1: lgdt boot_gdt_descr - __PAGE_OFFSET + lgdt pa(boot_gdt_descr) movl $(__BOOT_DS),%eax movl %eax,%ds movl %eax,%es @@ -105,8 +105,8 @@ ENTRY(startup_32) */ cld xorl %eax,%eax - movl $__bss_start - __PAGE_OFFSET,%edi - movl $__bss_stop - __PAGE_OFFSET,%ecx + movl $pa(__bss_start),%edi + movl $pa(__bss_stop),%ecx subl %edi,%ecx shrl $2,%ecx rep ; stosl @@ -118,31 +118,32 @@ ENTRY(startup_32) * (kexec on panic case). Hence copy out the parameters before initializing * page tables. */ - movl $(boot_params - __PAGE_OFFSET),%edi + movl $pa(boot_params),%edi movl $(PARAM_SIZE/4),%ecx cld rep movsl - movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi + movl pa(boot_params) + NEW_CL_POINTER,%esi andl %esi,%esi jz 1f # No comand line - movl $(boot_command_line - __PAGE_OFFSET),%edi + movl $pa(boot_command_line),%edi movl $(COMMAND_LINE_SIZE/4),%ecx rep movsl 1: #ifdef CONFIG_PARAVIRT - cmpw $0x207, (boot_params + BP_version - __PAGE_OFFSET) + /* This is can only trip for a broken bootloader... */ + cmpw $0x207, pa(boot_params + BP_version) jb default_entry /* Paravirt-compatible boot parameters. Look to see what architecture we're booting under. */ - movl (boot_params + BP_hardware_subarch - __PAGE_OFFSET), %eax + movl pa(boot_params + BP_hardware_subarch), %eax cmpl $num_subarch_entries, %eax jae bad_subarch - movl subarch_entries - __PAGE_OFFSET(,%eax,4), %eax + movl pa(subarch_entries)(,%eax,4), %eax subl $__PAGE_OFFSET, %eax jmp *%eax @@ -170,17 +171,68 @@ num_subarch_entries = (. - subarch_entries) / 4 * Mappings are created both at virtual address 0 (identity mapping) * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END. * - * Warning: don't use %esi or the stack in this code. However, %esp - * can be used as a GPR if you really need it... + * Note that the stack is not yet set up! */ -page_pde_offset = (__PAGE_OFFSET >> 20); +#define PTE_ATTR 0x007 /* PRESENT+RW+USER */ +#define PDE_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ +#define PGD_ATTR 0x001 /* PRESENT (no other attributes) */ default_entry: - movl $(pg0 - __PAGE_OFFSET), %edi - movl $(swapper_pg_dir - __PAGE_OFFSET), %edx - movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ +#ifdef CONFIG_X86_PAE + + /* + * In PAE mode swapper_pg_dir is statically defined to contain enough + * entries to cover the VMSPLIT option (that is the top 1, 2 or 3 + * entries). The identity mapping is handled by pointing two PGD + * entries to the first kernel PMD. + * + * Note the upper half of each PMD or PTE are always zero at + * this stage. + */ + +#define KPMDS ((0x100000000-__PAGE_OFFSET) >> 30) /* Number of kernel PMDs */ + + xorl %ebx,%ebx /* %ebx is kept at zero */ + + movl $pa(pg0), %edi + movl $pa(swapper_pg_pmd), %edx + movl $PTE_ATTR, %eax +10: + leal PDE_ATTR(%edi),%ecx /* Create PMD entry */ + movl %ecx,(%edx) /* Store PMD entry */ + /* Upper half already zero */ + addl $8,%edx + movl $512,%ecx +11: + stosl + xchgl %eax,%ebx + stosl + xchgl %eax,%ebx + addl $0x1000,%eax + loop 11b + + /* + * End condition: we must map up to and including INIT_MAP_BEYOND_END + * bytes beyond the end of our own page tables. + */ + leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp + cmpl %ebp,%eax + jb 10b +1: + movl %edi,pa(init_pg_tables_end) + + /* Do early initialization of the fixmap area */ + movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax + movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) +#else /* Not PAE */ + +page_pde_offset = (__PAGE_OFFSET >> 20); + + movl $pa(pg0), %edi + movl $pa(swapper_pg_dir), %edx + movl $PTE_ATTR, %eax 10: - leal 0x007(%edi),%ecx /* Create PDE entry */ + leal PDE_ATTR(%edi),%ecx /* Create PDE entry */ movl %ecx,(%edx) /* Store identity PDE entry */ movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ addl $4,%edx @@ -189,19 +241,20 @@ default_entry: stosl addl $0x1000,%eax loop 11b - /* End condition: we must map up to and including INIT_MAP_BEYOND_END */ - /* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */ - leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp + /* + * End condition: we must map up to and including INIT_MAP_BEYOND_END + * bytes beyond the end of our own page tables; the +0x007 is + * the attribute bits + */ + leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp cmpl %ebp,%eax jb 10b - movl %edi,(init_pg_tables_end - __PAGE_OFFSET) - - /* Do an early initialization of the fixmap area */ - movl $(swapper_pg_dir - __PAGE_OFFSET), %edx - movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax - addl $0x67, %eax /* 0x67 == _PAGE_TABLE */ - movl %eax, 4092(%edx) + movl %edi,pa(init_pg_tables_end) + /* Do early initialization of the fixmap area */ + movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax + movl %eax,pa(swapper_pg_dir+0xffc) +#endif jmp 3f /* * Non-boot CPU entry point; entered from trampoline.S @@ -241,7 +294,7 @@ ENTRY(startup_32_smp) * NOTE! We have to correct for the fact that we're * not yet offset PAGE_OFFSET.. */ -#define cr4_bits mmu_cr4_features-__PAGE_OFFSET +#define cr4_bits pa(mmu_cr4_features) movl cr4_bits,%edx andl %edx,%edx jz 6f @@ -276,10 +329,10 @@ ENTRY(startup_32_smp) /* * Enable paging */ - movl $swapper_pg_dir-__PAGE_OFFSET,%eax + movl $pa(swapper_pg_dir),%eax movl %eax,%cr3 /* set the page table pointer.. */ movl %cr0,%eax - orl $0x80000000,%eax + orl $X86_CR0_PG,%eax movl %eax,%cr0 /* ..and set paging (PG) bit */ ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */ 1: @@ -552,16 +605,44 @@ ENTRY(_stext) */ .section ".bss.page_aligned","wa" .align PAGE_SIZE_asm +#ifdef CONFIG_X86_PAE +ENTRY(swapper_pg_pmd) + .fill 1024*KPMDS,4,0 +#else ENTRY(swapper_pg_dir) .fill 1024,4,0 -ENTRY(swapper_pg_pmd) +#endif +ENTRY(swapper_pg_fixmap) .fill 1024,4,0 ENTRY(empty_zero_page) .fill 4096,1,0 - /* * This starts the data section. */ +#ifdef CONFIG_X86_PAE +.section ".data.page_aligned","wa" + /* Page-aligned for the benefit of paravirt? */ + .align PAGE_SIZE_asm +ENTRY(swapper_pg_dir) + .long pa(swapper_pg_pmd+PGD_ATTR),0 /* low identity map */ +# if KPMDS == 3 + .long pa(swapper_pg_pmd+PGD_ATTR),0 + .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 + .long pa(swapper_pg_pmd+PGD_ATTR+0x2000),0 +# elif KPMDS == 2 + .long 0,0 + .long pa(swapper_pg_pmd+PGD_ATTR),0 + .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 +# elif KPMDS == 1 + .long 0,0 + .long 0,0 + .long pa(swapper_pg_pmd+PGD_ATTR),0 +# else +# error "Kernel PMDs should be 1, 2 or 3" +# endif + .align PAGE_SIZE_asm /* needs to be page-sized too */ +#endif + .data ENTRY(stack_start) .long init_thread_union+THREAD_SIZE diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index d1d8c347cc0b..691ab4cb167b 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -154,7 +154,11 @@ struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; EXPORT_SYMBOL(boot_cpu_data); +#ifndef CONFIG_X86_PAE unsigned long mmu_cr4_features; +#else +unsigned long mmu_cr4_features = X86_CR4_PAE; +#endif /* for MCA, but anyone else can use it if they want */ unsigned int machine_id; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index d1bc04006d16..54aba3cf9efe 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -46,6 +46,7 @@ #include #include #include +#include unsigned int __VMALLOC_RESERVE = 128 << 20; @@ -328,44 +329,38 @@ pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; void __init native_pagetable_setup_start(pgd_t *base) { -#ifdef CONFIG_X86_PAE - int i; + unsigned long pfn, va; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; /* - * Init entries of the first-level page table to the - * zero page, if they haven't already been set up. - * - * In a normal native boot, we'll be running on a - * pagetable rooted in swapper_pg_dir, but not in PAE - * mode, so this will end up clobbering the mappings - * for the lower 24Mbytes of the address space, - * without affecting the kernel address space. + * Remove any mappings which extend past the end of physical + * memory from the boot time page table: */ - for (i = 0; i < USER_PTRS_PER_PGD; i++) - set_pgd(&base[i], - __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); - - /* Make sure kernel address space is empty so that a pagetable - will be allocated for it. */ - memset(&base[USER_PTRS_PER_PGD], 0, - KERNEL_PGD_PTRS * sizeof(pgd_t)); -#else + for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) { + va = PAGE_OFFSET + (pfn<> PAGE_SHIFT); -#endif } void __init native_pagetable_setup_done(pgd_t *base) { -#ifdef CONFIG_X86_PAE - /* - * Add low memory identity-mappings - SMP needs it when - * starting up on an AP from real-mode. In the non-PAE - * case we already have these mappings through head.S. - * All user-space mappings are explicitly cleared after - * SMP startup. - */ - set_pgd(&base[0], base[USER_PTRS_PER_PGD]); -#endif } /* @@ -374,9 +369,8 @@ void __init native_pagetable_setup_done(pgd_t *base) * the boot process. * * If we're booting on native hardware, this will be a pagetable - * constructed in arch/i386/kernel/head.S, and not running in PAE mode - * (even if we'll end up running in PAE). The root of the pagetable - * will be swapper_pg_dir. + * constructed in arch/x86/kernel/head_32.S. The root of the + * pagetable will be swapper_pg_dir. * * If we're booting paravirtualized under a hypervisor, then there are * more options: we may already be running PAE, and the pagetable may @@ -537,14 +531,6 @@ void __init paging_init(void) load_cr3(swapper_pg_dir); -#ifdef CONFIG_X86_PAE - /* - * We will bail out later - printk doesn't work right now so - * the user would just see a hanging kernel. - */ - if (cpu_has_pae) - set_in_cr4(X86_CR4_PAE); -#endif __flush_tlb_all(); kmap_init(); @@ -675,10 +661,6 @@ void __init mem_init(void) BUG_ON((unsigned long)high_memory > VMALLOC_START); #endif /* double-sanity-check paranoia */ -#ifdef CONFIG_X86_PAE - if (!cpu_has_pae) - panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); -#endif if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index ee6648fe6b15..1106b7f477bd 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -260,41 +260,46 @@ static int __init early_ioremap_debug_setup(char *str) early_param("early_ioremap_debug", early_ioremap_debug_setup); static __initdata int after_paging_init; -static __initdata unsigned long bm_pte[1024] +static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __attribute__((aligned(PAGE_SIZE))); -static inline unsigned long * __init early_ioremap_pgd(unsigned long addr) +static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) { - return (unsigned long *)swapper_pg_dir + ((addr >> 22) & 1023); + pgd_t *pgd = &swapper_pg_dir[pgd_index(addr)]; + pud_t *pud = pud_offset(pgd, addr); + pmd_t *pmd = pmd_offset(pud, addr); + + return pmd; } -static inline unsigned long * __init early_ioremap_pte(unsigned long addr) +static inline pte_t * __init early_ioremap_pte(unsigned long addr) { - return bm_pte + ((addr >> PAGE_SHIFT) & 1023); + return &bm_pte[pte_index(addr)]; } void __init early_ioremap_init(void) { - unsigned long *pgd; + pmd_t *pmd; if (early_ioremap_debug) printk(KERN_INFO "early_ioremap_init()\n"); - pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN)); - *pgd = __pa(bm_pte) | _PAGE_TABLE; + pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); memset(bm_pte, 0, sizeof(bm_pte)); + set_pmd(pmd, __pmd(__pa(bm_pte) | _PAGE_TABLE)); + /* - * The boot-ioremap range spans multiple pgds, for which + * The boot-ioremap range spans multiple pmds, for which * we are not prepared: */ - if (pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))) { + if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { WARN_ON(1); - printk(KERN_WARNING "pgd %p != %p\n", - pgd, early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))); + printk(KERN_WARNING "pmd %p != %p\n", + pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))); printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", - fix_to_virt(FIX_BTMAP_BEGIN)); + fix_to_virt(FIX_BTMAP_BEGIN)); printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END): %08lx\n", - fix_to_virt(FIX_BTMAP_END)); + fix_to_virt(FIX_BTMAP_END)); printk(KERN_WARNING "FIX_BTMAP_END: %d\n", FIX_BTMAP_END); printk(KERN_WARNING "FIX_BTMAP_BEGIN: %d\n", @@ -304,28 +309,29 @@ void __init early_ioremap_init(void) void __init early_ioremap_clear(void) { - unsigned long *pgd; + pmd_t *pmd; if (early_ioremap_debug) printk(KERN_INFO "early_ioremap_clear()\n"); - pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN)); - *pgd = 0; - paravirt_release_pt(__pa(pgd) >> PAGE_SHIFT); + pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); + pmd_clear(pmd); + paravirt_release_pt(__pa(pmd) >> PAGE_SHIFT); __flush_tlb_all(); } void __init early_ioremap_reset(void) { enum fixed_addresses idx; - unsigned long *pte, phys, addr; + unsigned long addr, phys; + pte_t *pte; after_paging_init = 1; for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) { addr = fix_to_virt(idx); pte = early_ioremap_pte(addr); - if (*pte & _PAGE_PRESENT) { - phys = *pte & PAGE_MASK; + if (pte_present(*pte)) { + phys = pte_val(*pte) & PAGE_MASK; set_fixmap(idx, phys); } } @@ -334,7 +340,8 @@ void __init early_ioremap_reset(void) static void __init __early_set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags) { - unsigned long *pte, addr = __fix_to_virt(idx); + unsigned long addr = __fix_to_virt(idx); + pte_t *pte; if (idx >= __end_of_fixed_addresses) { BUG(); @@ -342,9 +349,9 @@ static void __init __early_set_fixmap(enum fixed_addresses idx, } pte = early_ioremap_pte(addr); if (pgprot_val(flags)) - *pte = (phys & PAGE_MASK) | pgprot_val(flags); + set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); else - *pte = 0; + pte_clear(NULL, addr, pte); __flush_tlb_one(addr); } diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h index 984998a30741..5f7257fd589b 100644 --- a/include/asm-x86/page_32.h +++ b/include/asm-x86/page_32.h @@ -48,7 +48,6 @@ typedef unsigned long pgprotval_t; typedef unsigned long phys_addr_t; typedef union { pteval_t pte, pte_low; } pte_t; -typedef pte_t boot_pte_t; #endif /* __ASSEMBLY__ */ #endif /* CONFIG_X86_PAE */ diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h index 80dd438642f6..a842c7222b1e 100644 --- a/include/asm-x86/pgtable_32.h +++ b/include/asm-x86/pgtable_32.h @@ -52,10 +52,6 @@ void paging_init(void); #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) -#define TWOLEVEL_PGDIR_SHIFT 22 -#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT) -#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS) - /* Just any arbitrary offset to the start of the vmalloc VM area: the * current 8MB value just means that there will be a 8MB "hole" after the * physical memory until the kernel virtual memory starts. That means that -- cgit v1.2.3-59-g8ed1b From 76ebd0548df6ee48586e9b80d8fc2f58aa5fb51c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 9 Feb 2008 23:24:09 +0100 Subject: x86: introduce page pool in cpa DEBUG_PAGEALLOC was not possible on 64-bit due to its early-bootup hardcoded reliance on PSE pages, and the unrobustness of the runtime splitup of large pages. The splitup ended in recursive calls to alloc_pages() when a page for a pte split was requested. Avoid the recursion with a preallocated page pool, which is used to split up large mappings and gets refilled in the return path of kernel_map_pages after the split has been done. The size of the page pool is adjusted to the available memory. This part just implements the page pool and the initialization w/o using it yet. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 2 ++ arch/x86/mm/init_64.c | 2 ++ arch/x86/mm/pageattr.c | 82 +++++++++++++++++++++++++++++++++++++++++++- include/asm-x86/cacheflush.h | 2 ++ 4 files changed, 87 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 54aba3cf9efe..8106bba41ecb 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -664,6 +664,8 @@ void __init mem_init(void) if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); + cpa_init(); + /* * Subtle. SMP is doing it's boot stuff late (because it has to * fork idle threads) - but it also needs low mappings for the diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 620d2b6b6bf4..b59fc238151f 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -528,6 +528,8 @@ void __init mem_init(void) reservedpages << (PAGE_SHIFT-10), datasize >> 10, initsize >> 10); + + cpa_init(); } void free_init_pages(char *what, unsigned long begin, unsigned long end) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index eb2a54415a77..831462c3bc35 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -336,6 +337,77 @@ out_unlock: return do_split; } +static LIST_HEAD(page_pool); +static unsigned long pool_size, pool_pages, pool_low; +static unsigned long pool_used, pool_failed, pool_refill; + +static void cpa_fill_pool(void) +{ + struct page *p; + gfp_t gfp = GFP_KERNEL; + + /* Do not allocate from interrupt context */ + if (in_irq() || irqs_disabled()) + return; + /* + * Check unlocked. I does not matter when we have one more + * page in the pool. The bit lock avoids recursive pool + * allocations: + */ + if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill)) + return; + +#ifdef CONFIG_DEBUG_PAGEALLOC + /* + * We could do: + * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL; + * but this fails on !PREEMPT kernels + */ + gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; +#endif + + while (pool_pages < pool_size) { + p = alloc_pages(gfp, 0); + if (!p) { + pool_failed++; + break; + } + spin_lock_irq(&pgd_lock); + list_add(&p->lru, &page_pool); + pool_pages++; + spin_unlock_irq(&pgd_lock); + } + clear_bit_unlock(0, &pool_refill); +} + +#define SHIFT_MB (20 - PAGE_SHIFT) +#define ROUND_MB_GB ((1 << 10) - 1) +#define SHIFT_MB_GB 10 +#define POOL_PAGES_PER_GB 16 + +void __init cpa_init(void) +{ + struct sysinfo si; + unsigned long gb; + + si_meminfo(&si); + /* + * Calculate the number of pool pages: + * + * Convert totalram (nr of pages) to MiB and round to the next + * GiB. Shift MiB to Gib and multiply the result by + * POOL_PAGES_PER_GB: + */ + gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB; + pool_size = POOL_PAGES_PER_GB * gb; + pool_low = pool_size; + + cpa_fill_pool(); + printk(KERN_DEBUG + "CPA: page pool initialized %lu of %lu pages preallocated\n", + pool_pages, pool_size); +} + static int split_large_page(pte_t *kpte, unsigned long address) { unsigned long flags, pfn, pfninc = 1; @@ -600,7 +672,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, * Check whether we really changed something: */ if (!cpa.flushtlb) - return ret; + goto out; /* * No need to flush, when we did not set any of the caching @@ -619,6 +691,8 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, else cpa_flush_all(cache); +out: + cpa_fill_pool(); return ret; } @@ -772,6 +846,12 @@ void kernel_map_pages(struct page *page, int numpages, int enable) * but that can deadlock->flush only current cpu: */ __flush_tlb_all(); + + /* + * Try to refill the page pool here. We can do this only after + * the tlb flush. + */ + cpa_fill_pool(); } #endif diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h index 8dd8c5e3cc7f..6a22212b4b20 100644 --- a/include/asm-x86/cacheflush.h +++ b/include/asm-x86/cacheflush.h @@ -44,6 +44,8 @@ int set_memory_np(unsigned long addr, int numpages); void clflush_cache_range(void *addr, unsigned int size); +void cpa_init(void); + #ifdef CONFIG_DEBUG_RODATA void mark_rodata_ro(void); #endif -- cgit v1.2.3-59-g8ed1b