aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/setup.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/setup.c')
-rw-r--r--arch/x86/kernel/setup.c301
1 files changed, 181 insertions, 120 deletions
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 00f6c1472b85..9c857f05cef0 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -108,17 +108,16 @@
#include <asm/topology.h>
#include <asm/apicdef.h>
#include <asm/amd_nb.h>
-#ifdef CONFIG_X86_64
-#include <asm/numa_64.h>
-#endif
#include <asm/mce.h>
#include <asm/alternative.h>
#include <asm/prom.h>
/*
- * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
- * The direct mapping extends to max_pfn_mapped, so that we can directly access
- * apertures, ACPI and other tables without having to play with fixmaps.
+ * max_low_pfn_mapped: highest direct mapped pfn under 4GB
+ * max_pfn_mapped: highest direct mapped pfn over 4GB
+ *
+ * The direct mapping only covers E820_RAM regions, so the ranges and gaps are
+ * represented by pfn_mapped
*/
unsigned long max_low_pfn_mapped;
unsigned long max_pfn_mapped;
@@ -276,18 +275,7 @@ void * __init extend_brk(size_t size, size_t align)
return ret;
}
-#ifdef CONFIG_X86_64
-static void __init init_gbpages(void)
-{
- if (direct_gbpages && cpu_has_gbpages)
- printk(KERN_INFO "Using GB pages for direct mapping\n");
- else
- direct_gbpages = 0;
-}
-#else
-static inline void init_gbpages(void)
-{
-}
+#ifdef CONFIG_X86_32
static void __init cleanup_highmap(void)
{
}
@@ -296,8 +284,8 @@ static void __init cleanup_highmap(void)
static void __init reserve_brk(void)
{
if (_brk_end > _brk_start)
- memblock_reserve(__pa(_brk_start),
- __pa(_brk_end) - __pa(_brk_start));
+ memblock_reserve(__pa_symbol(_brk_start),
+ _brk_end - _brk_start);
/* Mark brk area as locked down and no longer taking any
new allocations */
@@ -306,27 +294,43 @@ static void __init reserve_brk(void)
#ifdef CONFIG_BLK_DEV_INITRD
+static u64 __init get_ramdisk_image(void)
+{
+ u64 ramdisk_image = boot_params.hdr.ramdisk_image;
+
+ ramdisk_image |= (u64)boot_params.ext_ramdisk_image << 32;
+
+ return ramdisk_image;
+}
+static u64 __init get_ramdisk_size(void)
+{
+ u64 ramdisk_size = boot_params.hdr.ramdisk_size;
+
+ ramdisk_size |= (u64)boot_params.ext_ramdisk_size << 32;
+
+ return ramdisk_size;
+}
+
#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT)
static void __init relocate_initrd(void)
{
/* Assume only end is not page aligned */
- u64 ramdisk_image = boot_params.hdr.ramdisk_image;
- u64 ramdisk_size = boot_params.hdr.ramdisk_size;
+ u64 ramdisk_image = get_ramdisk_image();
+ u64 ramdisk_size = get_ramdisk_size();
u64 area_size = PAGE_ALIGN(ramdisk_size);
- u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
u64 ramdisk_here;
unsigned long slop, clen, mapaddr;
char *p, *q;
- /* We need to move the initrd down into lowmem */
- ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size,
- PAGE_SIZE);
+ /* We need to move the initrd down into directly mapped mem */
+ ramdisk_here = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
+ area_size, PAGE_SIZE);
if (!ramdisk_here)
panic("Cannot find place for new RAMDISK of size %lld\n",
ramdisk_size);
- /* Note: this includes all the lowmem currently occupied by
+ /* Note: this includes all the mem currently occupied by
the initrd, we rely on that fact to keep the data intact. */
memblock_reserve(ramdisk_here, area_size);
initrd_start = ramdisk_here + PAGE_OFFSET;
@@ -336,17 +340,7 @@ static void __init relocate_initrd(void)
q = (char *)initrd_start;
- /* Copy any lowmem portion of the initrd */
- if (ramdisk_image < end_of_lowmem) {
- clen = end_of_lowmem - ramdisk_image;
- p = (char *)__va(ramdisk_image);
- memcpy(q, p, clen);
- q += clen;
- ramdisk_image += clen;
- ramdisk_size -= clen;
- }
-
- /* Copy the highmem portion of the initrd */
+ /* Copy the initrd */
while (ramdisk_size) {
slop = ramdisk_image & ~PAGE_MASK;
clen = ramdisk_size;
@@ -360,22 +354,35 @@ static void __init relocate_initrd(void)
ramdisk_image += clen;
ramdisk_size -= clen;
}
- /* high pages is not converted by early_res_to_bootmem */
- ramdisk_image = boot_params.hdr.ramdisk_image;
- ramdisk_size = boot_params.hdr.ramdisk_size;
+
+ ramdisk_image = get_ramdisk_image();
+ ramdisk_size = get_ramdisk_size();
printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to"
" [mem %#010llx-%#010llx]\n",
ramdisk_image, ramdisk_image + ramdisk_size - 1,
ramdisk_here, ramdisk_here + ramdisk_size - 1);
}
+static void __init early_reserve_initrd(void)
+{
+ /* Assume only end is not page aligned */
+ u64 ramdisk_image = get_ramdisk_image();
+ u64 ramdisk_size = get_ramdisk_size();
+ u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
+
+ if (!boot_params.hdr.type_of_loader ||
+ !ramdisk_image || !ramdisk_size)
+ return; /* No initrd provided by bootloader */
+
+ memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
+}
static void __init reserve_initrd(void)
{
/* Assume only end is not page aligned */
- u64 ramdisk_image = boot_params.hdr.ramdisk_image;
- u64 ramdisk_size = boot_params.hdr.ramdisk_size;
+ u64 ramdisk_image = get_ramdisk_image();
+ u64 ramdisk_size = get_ramdisk_size();
u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
- u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
+ u64 mapped_size;
if (!boot_params.hdr.type_of_loader ||
!ramdisk_image || !ramdisk_size)
@@ -383,22 +390,18 @@ static void __init reserve_initrd(void)
initrd_start = 0;
- if (ramdisk_size >= (end_of_lowmem>>1)) {
+ mapped_size = memblock_mem_size(max_pfn_mapped);
+ if (ramdisk_size >= (mapped_size>>1))
panic("initrd too large to handle, "
"disabling initrd (%lld needed, %lld available)\n",
- ramdisk_size, end_of_lowmem>>1);
- }
+ ramdisk_size, mapped_size>>1);
printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image,
ramdisk_end - 1);
-
- if (ramdisk_end <= end_of_lowmem) {
- /* All in lowmem, easy case */
- /*
- * don't need to reserve again, already reserved early
- * in i386_start_kernel
- */
+ if (pfn_range_is_mapped(PFN_DOWN(ramdisk_image),
+ PFN_DOWN(ramdisk_end))) {
+ /* All are mapped, easy case */
initrd_start = ramdisk_image + PAGE_OFFSET;
initrd_end = initrd_start + ramdisk_size;
return;
@@ -409,6 +412,9 @@ static void __init reserve_initrd(void)
memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
}
#else
+static void __init early_reserve_initrd(void)
+{
+}
static void __init reserve_initrd(void)
{
}
@@ -419,8 +425,6 @@ static void __init parse_setup_data(void)
struct setup_data *data;
u64 pa_data;
- if (boot_params.hdr.version < 0x0209)
- return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
u32 data_len, map_len;
@@ -456,8 +460,6 @@ static void __init e820_reserve_setup_data(void)
u64 pa_data;
int found = 0;
- if (boot_params.hdr.version < 0x0209)
- return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
data = early_memremap(pa_data, sizeof(*data));
@@ -481,8 +483,6 @@ static void __init memblock_x86_reserve_range_setup_data(void)
struct setup_data *data;
u64 pa_data;
- if (boot_params.hdr.version < 0x0209)
- return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
data = early_memremap(pa_data, sizeof(*data));
@@ -501,17 +501,51 @@ static void __init memblock_x86_reserve_range_setup_data(void)
/*
* Keep the crash kernel below this limit. On 32 bits earlier kernels
* would limit the kernel to the low 512 MiB due to mapping restrictions.
- * On 64 bits, kexec-tools currently limits us to 896 MiB; increase this
- * limit once kexec-tools are fixed.
*/
#ifdef CONFIG_X86_32
# define CRASH_KERNEL_ADDR_MAX (512 << 20)
#else
-# define CRASH_KERNEL_ADDR_MAX (896 << 20)
+# define CRASH_KERNEL_ADDR_MAX MAXMEM
+#endif
+
+static void __init reserve_crashkernel_low(void)
+{
+#ifdef CONFIG_X86_64
+ const unsigned long long alignment = 16<<20; /* 16M */
+ unsigned long long low_base = 0, low_size = 0;
+ unsigned long total_low_mem;
+ unsigned long long base;
+ int ret;
+
+ total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT));
+ ret = parse_crashkernel_low(boot_command_line, total_low_mem,
+ &low_size, &base);
+ if (ret != 0 || low_size <= 0)
+ return;
+
+ low_base = memblock_find_in_range(low_size, (1ULL<<32),
+ low_size, alignment);
+
+ if (!low_base) {
+ pr_info("crashkernel low reservation failed - No suitable area found.\n");
+
+ return;
+ }
+
+ memblock_reserve(low_base, low_size);
+ pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n",
+ (unsigned long)(low_size >> 20),
+ (unsigned long)(low_base >> 20),
+ (unsigned long)(total_low_mem >> 20));
+ crashk_low_res.start = low_base;
+ crashk_low_res.end = low_base + low_size - 1;
+ insert_resource(&iomem_resource, &crashk_low_res);
#endif
+}
static void __init reserve_crashkernel(void)
{
+ const unsigned long long alignment = 16<<20; /* 16M */
unsigned long long total_mem;
unsigned long long crash_size, crash_base;
int ret;
@@ -525,8 +559,6 @@ static void __init reserve_crashkernel(void)
/* 0 means: find the address automatically */
if (crash_base <= 0) {
- const unsigned long long alignment = 16<<20; /* 16M */
-
/*
* kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
*/
@@ -537,6 +569,7 @@ static void __init reserve_crashkernel(void)
pr_info("crashkernel reservation failed - No suitable area found.\n");
return;
}
+
} else {
unsigned long long start;
@@ -558,6 +591,9 @@ static void __init reserve_crashkernel(void)
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
insert_resource(&iomem_resource, &crashk_res);
+
+ if (crash_base >= (1ULL<<32))
+ reserve_crashkernel_low();
}
#else
static void __init reserve_crashkernel(void)
@@ -608,8 +644,6 @@ static __init void reserve_ibft_region(void)
memblock_reserve(addr, size);
}
-static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
-
static bool __init snb_gfx_workaround_needed(void)
{
#ifdef CONFIG_PCI
@@ -698,8 +732,7 @@ static void __init trim_bios_range(void)
* since some BIOSes are known to corrupt low memory. See the
* Kconfig help text for X86_RESERVE_LOW.
*/
- e820_update_range(0, ALIGN(reserve_low, PAGE_SIZE),
- E820_RAM, E820_RESERVED);
+ e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED);
/*
* special case: Some BIOSen report the PC BIOS
@@ -711,6 +744,29 @@ static void __init trim_bios_range(void)
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
}
+/* called before trim_bios_range() to spare extra sanitize */
+static void __init e820_add_kernel_range(void)
+{
+ u64 start = __pa_symbol(_text);
+ u64 size = __pa_symbol(_end) - start;
+
+ /*
+ * Complain if .text .data and .bss are not marked as E820_RAM and
+ * attempt to fix it by adding the range. We may have a confused BIOS,
+ * or the user may have used memmap=exactmap or memmap=xxM$yyM to
+ * exclude kernel range. If we really are running on top non-RAM,
+ * we will crash later anyways.
+ */
+ if (e820_all_mapped(start, start + size, E820_RAM))
+ return;
+
+ pr_warn(".text .data .bss are not marked as E820_RAM!\n");
+ e820_remove_range(start, size, E820_RAM, 0);
+ e820_add_region(start, size, E820_RAM);
+}
+
+static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
+
static int __init parse_reservelow(char *p)
{
unsigned long long size;
@@ -733,6 +789,11 @@ static int __init parse_reservelow(char *p)
early_param("reservelow", parse_reservelow);
+static void __init trim_low_memory_range(void)
+{
+ memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE));
+}
+
/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
@@ -748,6 +809,17 @@ early_param("reservelow", parse_reservelow);
void __init setup_arch(char **cmdline_p)
{
+ memblock_reserve(__pa_symbol(_text),
+ (unsigned long)__bss_stop - (unsigned long)_text);
+
+ early_reserve_initrd();
+
+ /*
+ * At this point everything still needed from the boot loader
+ * or BIOS or kernel text should be early reserved or marked not
+ * RAM in e820. All other memory is free game.
+ */
+
#ifdef CONFIG_X86_32
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
visws_early_detect();
@@ -807,15 +879,15 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_EFI
if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
"EL32", 4)) {
- efi_enabled = 1;
- efi_64bit = false;
+ set_bit(EFI_BOOT, &x86_efi_facility);
} else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
"EL64", 4)) {
- efi_enabled = 1;
- efi_64bit = true;
+ set_bit(EFI_BOOT, &x86_efi_facility);
+ set_bit(EFI_64BIT, &x86_efi_facility);
}
- if (efi_enabled && efi_memblock_x86_reserve_range())
- efi_enabled = 0;
+
+ if (efi_enabled(EFI_BOOT))
+ efi_memblock_x86_reserve_range();
#endif
x86_init.oem.arch_setup();
@@ -835,12 +907,12 @@ void __init setup_arch(char **cmdline_p)
init_mm.end_data = (unsigned long) _edata;
init_mm.brk = _brk_end;
- code_resource.start = virt_to_phys(_text);
- code_resource.end = virt_to_phys(_etext)-1;
- data_resource.start = virt_to_phys(_etext);
- data_resource.end = virt_to_phys(_edata)-1;
- bss_resource.start = virt_to_phys(&__bss_start);
- bss_resource.end = virt_to_phys(&__bss_stop)-1;
+ code_resource.start = __pa_symbol(_text);
+ code_resource.end = __pa_symbol(_etext)-1;
+ data_resource.start = __pa_symbol(_etext);
+ data_resource.end = __pa_symbol(_edata)-1;
+ bss_resource.start = __pa_symbol(__bss_start);
+ bss_resource.end = __pa_symbol(__bss_stop)-1;
#ifdef CONFIG_CMDLINE_BOOL
#ifdef CONFIG_CMDLINE_OVERRIDE
@@ -888,7 +960,7 @@ void __init setup_arch(char **cmdline_p)
finish_e820_parsing();
- if (efi_enabled)
+ if (efi_enabled(EFI_BOOT))
efi_init();
dmi_scan_machine();
@@ -906,6 +978,7 @@ void __init setup_arch(char **cmdline_p)
insert_resource(&iomem_resource, &data_resource);
insert_resource(&iomem_resource, &bss_resource);
+ e820_add_kernel_range();
trim_bios_range();
#ifdef CONFIG_X86_32
if (ppro_with_ram_bug()) {
@@ -955,6 +1028,8 @@ void __init setup_arch(char **cmdline_p)
reserve_ibft_region();
+ early_alloc_pgt_buf();
+
/*
* Need to conclude brk, before memblock_x86_fill()
* it could use memblock_find_in_range, could overlap with
@@ -964,14 +1039,14 @@ void __init setup_arch(char **cmdline_p)
cleanup_highmap();
- memblock.current_limit = get_max_mapped();
+ memblock.current_limit = ISA_END_ADDRESS;
memblock_x86_fill();
/*
* The EFI specification says that boot service code won't be called
* after ExitBootServices(). This is, in fact, a lie.
*/
- if (efi_enabled)
+ if (efi_enabled(EFI_MEMMAP))
efi_reserve_boot_services();
/* preallocate 4k for mptable mpc */
@@ -981,41 +1056,31 @@ void __init setup_arch(char **cmdline_p)
setup_bios_corruption_check();
#endif
+ /*
+ * In the memory hotplug case, the kernel needs info from SRAT to
+ * determine which memory is hotpluggable before allocating memory
+ * using memblock.
+ */
+ acpi_boot_table_init();
+ early_acpi_boot_init();
+ early_parse_srat();
+
+#ifdef CONFIG_X86_32
printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n",
(max_pfn_mapped<<PAGE_SHIFT) - 1);
+#endif
- setup_real_mode();
+ reserve_real_mode();
trim_platform_memory_ranges();
+ trim_low_memory_range();
- init_gbpages();
+ init_mem_mapping();
- /* max_pfn_mapped is updated here */
- max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
- max_pfn_mapped = max_low_pfn_mapped;
+ early_trap_pf_init();
-#ifdef CONFIG_X86_64
- if (max_pfn > max_low_pfn) {
- int i;
- unsigned long start, end;
- unsigned long start_pfn, end_pfn;
-
- for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn,
- NULL) {
-
- end = PFN_PHYS(end_pfn);
- if (end <= (1UL<<32))
- continue;
-
- start = PFN_PHYS(start_pfn);
- max_pfn_mapped = init_memory_mapping(
- max((1UL<<32), start), end);
- }
+ setup_real_mode();
- /* can we preseve max_low_pfn ?*/
- max_low_pfn = max_pfn;
- }
-#endif
memblock.current_limit = get_max_mapped();
dma_contiguous_reserve(0);
@@ -1045,10 +1110,6 @@ void __init setup_arch(char **cmdline_p)
/*
* Parse the ACPI tables for possible boot-time SMP configuration.
*/
- acpi_boot_table_init();
-
- early_acpi_boot_init();
-
initmem_init();
memblock_find_dma_reserve();
@@ -1114,7 +1175,7 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
- if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
+ if (!efi_enabled(EFI_BOOT) || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
conswitchp = &vga_con;
#elif defined(CONFIG_DUMMY_CONSOLE)
conswitchp = &dummy_con;
@@ -1131,14 +1192,14 @@ void __init setup_arch(char **cmdline_p)
register_refined_jiffies(CLOCK_TICK_RATE);
#ifdef CONFIG_EFI
- /* Once setup is done above, disable efi_enabled on mismatched
- * firmware/kernel archtectures since there is no support for
- * runtime services.
+ /* Once setup is done above, unmap the EFI memory map on
+ * mismatched firmware/kernel archtectures since there is no
+ * support for runtime services.
*/
- if (efi_enabled && IS_ENABLED(CONFIG_X86_64) != efi_64bit) {
+ if (efi_enabled(EFI_BOOT) &&
+ IS_ENABLED(CONFIG_X86_64) != efi_enabled(EFI_64BIT)) {
pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n");
efi_unmap_memmap();
- efi_enabled = 0;
}
#endif
}