aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/x86/kernel/machine_kexec_64.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/machine_kexec_64.c')
-rw-r--r--arch/x86/kernel/machine_kexec_64.c201
1 files changed, 154 insertions, 47 deletions
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index b180d8e497c3..697fb99406e6 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -28,6 +28,7 @@
#include <asm/setup.h>
#include <asm/set_memory.h>
#include <asm/cpu.h>
+#include <asm/efi.h>
#ifdef CONFIG_ACPI
/*
@@ -75,6 +76,19 @@ map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p)
static int map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) { return 0; }
#endif
+static int map_mmio_serial(struct x86_mapping_info *info, pgd_t *level4p)
+{
+ unsigned long mstart, mend;
+
+ if (!kexec_debug_8250_mmio32)
+ return 0;
+
+ mstart = kexec_debug_8250_mmio32 & PAGE_MASK;
+ mend = (kexec_debug_8250_mmio32 + PAGE_SIZE + 23) & PAGE_MASK;
+ pr_info("Map PCI serial at %lx - %lx\n", mstart, mend);
+ return kernel_ident_mapping_init(info, level4p, mstart, mend);
+}
+
#ifdef CONFIG_KEXEC_FILE
const struct kexec_file_ops * const kexec_file_loaders[] = {
&kexec_bzImage64_ops,
@@ -87,6 +101,8 @@ map_efi_systab(struct x86_mapping_info *info, pgd_t *level4p)
{
#ifdef CONFIG_EFI
unsigned long mstart, mend;
+ void *kaddr;
+ int ret;
if (!efi_enabled(EFI_BOOT))
return 0;
@@ -102,6 +118,30 @@ map_efi_systab(struct x86_mapping_info *info, pgd_t *level4p)
if (!mstart)
return 0;
+ ret = kernel_ident_mapping_init(info, level4p, mstart, mend);
+ if (ret)
+ return ret;
+
+ kaddr = memremap(mstart, mend - mstart, MEMREMAP_WB);
+ if (!kaddr) {
+ pr_err("Could not map UEFI system table\n");
+ return -ENOMEM;
+ }
+
+ mstart = efi_config_table;
+
+ if (efi_enabled(EFI_64BIT)) {
+ efi_system_table_64_t *stbl = (efi_system_table_64_t *)kaddr;
+
+ mend = mstart + sizeof(efi_config_table_64_t) * stbl->nr_tables;
+ } else {
+ efi_system_table_32_t *stbl = (efi_system_table_32_t *)kaddr;
+
+ mend = mstart + sizeof(efi_config_table_32_t) * stbl->nr_tables;
+ }
+
+ memunmap(kaddr);
+
return kernel_ident_mapping_init(info, level4p, mstart, mend);
#endif
return 0;
@@ -119,7 +159,8 @@ static void free_transition_pgtable(struct kimage *image)
image->arch.pte = NULL;
}
-static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
+static int init_transition_pgtable(struct kimage *image, pgd_t *pgd,
+ unsigned long control_page)
{
pgprot_t prot = PAGE_KERNEL_EXEC_NOENC;
unsigned long vaddr, paddr;
@@ -129,8 +170,13 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
pmd_t *pmd;
pte_t *pte;
- vaddr = (unsigned long)relocate_kernel;
- paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
+ /*
+ * For the transition to the identity mapped page tables, the control
+ * code page also needs to be mapped at the virtual address it starts
+ * off running from.
+ */
+ vaddr = (unsigned long)__va(control_page);
+ paddr = control_page;
pgd += pgd_index(vaddr);
if (!pgd_present(*pgd)) {
p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL);
@@ -189,7 +235,7 @@ static void *alloc_pgt_page(void *data)
return p;
}
-static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
+static int init_pgtable(struct kimage *image, unsigned long control_page)
{
struct x86_mapping_info info = {
.alloc_pgt_page = alloc_pgt_page,
@@ -198,12 +244,12 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
.kernpg_flag = _KERNPG_TABLE_NOENC,
};
unsigned long mstart, mend;
- pgd_t *level4p;
int result;
int i;
- level4p = (pgd_t *)__va(start_pgtable);
- clear_page(level4p);
+ image->arch.pgd = alloc_pgt_page(image);
+ if (!image->arch.pgd)
+ return -ENOMEM;
if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) {
info.page_flag |= _PAGE_ENC;
@@ -217,8 +263,8 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
mstart = pfn_mapped[i].start << PAGE_SHIFT;
mend = pfn_mapped[i].end << PAGE_SHIFT;
- result = kernel_ident_mapping_init(&info,
- level4p, mstart, mend);
+ result = kernel_ident_mapping_init(&info, image->arch.pgd,
+ mstart, mend);
if (result)
return result;
}
@@ -233,8 +279,8 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
mstart = image->segment[i].mem;
mend = mstart + image->segment[i].memsz;
- result = kernel_ident_mapping_init(&info,
- level4p, mstart, mend);
+ result = kernel_ident_mapping_init(&info, image->arch.pgd,
+ mstart, mend);
if (result)
return result;
@@ -244,15 +290,23 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
* Prepare EFI systab and ACPI tables for kexec kernel since they are
* not covered by pfn_mapped.
*/
- result = map_efi_systab(&info, level4p);
+ result = map_efi_systab(&info, image->arch.pgd);
if (result)
return result;
- result = map_acpi_tables(&info, level4p);
+ result = map_acpi_tables(&info, image->arch.pgd);
if (result)
return result;
- return init_transition_pgtable(image, level4p);
+ result = map_mmio_serial(&info, image->arch.pgd);
+ if (result)
+ return result;
+
+ /*
+ * This must be last because the intermediate page table pages it
+ * allocates will not be control pages and may overlap the image.
+ */
+ return init_transition_pgtable(image, image->arch.pgd, control_page);
}
static void load_segments(void)
@@ -267,24 +321,58 @@ static void load_segments(void)
);
}
+static void prepare_debug_idt(unsigned long control_page, unsigned long vec_ofs)
+{
+ gate_desc idtentry = { 0 };
+ int i;
+
+ idtentry.bits.p = 1;
+ idtentry.bits.type = GATE_TRAP;
+ idtentry.segment = __KERNEL_CS;
+ idtentry.offset_low = (control_page & 0xFFFF) + vec_ofs;
+ idtentry.offset_middle = (control_page >> 16) & 0xFFFF;
+ idtentry.offset_high = control_page >> 32;
+
+ for (i = 0; i < 16; i++) {
+ kexec_debug_idt[i] = idtentry;
+ idtentry.offset_low += KEXEC_DEBUG_EXC_HANDLER_SIZE;
+ }
+}
+
int machine_kexec_prepare(struct kimage *image)
{
- unsigned long start_pgtable;
+ void *control_page = page_address(image->control_code_page);
+ unsigned long reloc_start = (unsigned long)__relocate_kernel_start;
+ unsigned long reloc_end = (unsigned long)__relocate_kernel_end;
int result;
- /* Calculate the offsets */
- start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
-
/* Setup the identity mapped 64bit page table */
- result = init_pgtable(image, start_pgtable);
+ result = init_pgtable(image, __pa(control_page));
if (result)
return result;
+ kexec_va_control_page = (unsigned long)control_page;
+ kexec_pa_table_page = (unsigned long)__pa(image->arch.pgd);
+
+ if (image->type == KEXEC_TYPE_DEFAULT)
+ kexec_pa_swap_page = page_to_pfn(image->swap_page) << PAGE_SHIFT;
+
+ prepare_debug_idt((unsigned long)__pa(control_page),
+ (unsigned long)kexec_debug_exc_vectors - reloc_start);
+
+ __memcpy(control_page, __relocate_kernel_start, reloc_end - reloc_start);
+
+ set_memory_rox((unsigned long)control_page, 1);
return 0;
}
void machine_kexec_cleanup(struct kimage *image)
{
+ void *control_page = page_address(image->control_code_page);
+
+ set_memory_nx((unsigned long)control_page, 1);
+ set_memory_rw((unsigned long)control_page, 1);
+
free_transition_pgtable(image);
}
@@ -292,11 +380,19 @@ void machine_kexec_cleanup(struct kimage *image)
* Do not allocate memory (or fail in any way) in machine_kexec().
* We are past the point of no return, committed to rebooting now.
*/
-void machine_kexec(struct kimage *image)
+void __nocfi machine_kexec(struct kimage *image)
{
- unsigned long page_list[PAGES_NR];
- void *control_page;
+ unsigned long reloc_start = (unsigned long)__relocate_kernel_start;
+ relocate_kernel_fn *relocate_kernel_ptr;
+ unsigned int host_mem_enc_active;
int save_ftrace_enabled;
+ void *control_page;
+
+ /*
+ * This must be done before load_segments() since if call depth tracking
+ * is used then GS must be valid to make any function calls.
+ */
+ host_mem_enc_active = cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT);
#ifdef CONFIG_KEXEC_JUMP
if (image->preserve_context)
@@ -323,17 +419,13 @@ void machine_kexec(struct kimage *image)
#endif
}
- control_page = page_address(image->control_code_page) + PAGE_SIZE;
- __memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
+ control_page = page_address(image->control_code_page);
- page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
- page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
- page_list[PA_TABLE_PAGE] =
- (unsigned long)__pa(page_address(image->control_code_page));
-
- if (image->type == KEXEC_TYPE_DEFAULT)
- page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
- << PAGE_SHIFT);
+ /*
+ * Allow for the possibility that relocate_kernel might not be at
+ * the very start of the page.
+ */
+ relocate_kernel_ptr = control_page + (unsigned long)relocate_kernel - reloc_start;
/*
* The segment registers are funny things, they have both a
@@ -342,23 +434,17 @@ void machine_kexec(struct kimage *image)
* with from a table in memory. At no other time is the
* descriptor table in memory accessed.
*
- * I take advantage of this here by force loading the
- * segments, before I zap the gdt with an invalid value.
+ * Take advantage of this here by force loading the segments,
+ * before the GDT is zapped with an invalid value.
*/
load_segments();
- /*
- * The gdt & idt are now invalid.
- * If you want to load them you must set up your own idt & gdt.
- */
- native_idt_invalidate();
- native_gdt_invalidate();
/* now call it */
- image->start = relocate_kernel((unsigned long)image->head,
- (unsigned long)page_list,
- image->start,
- image->preserve_context,
- cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT));
+ image->start = relocate_kernel_ptr((unsigned long)image->head,
+ virt_to_phys(control_page),
+ image->start,
+ image->preserve_context,
+ host_mem_enc_active);
#ifdef CONFIG_KEXEC_JUMP
if (image->preserve_context)
@@ -539,19 +625,40 @@ static void kexec_mark_crashkres(bool protect)
/* Don't touch the control code page used in crash_kexec().*/
control = PFN_PHYS(page_to_pfn(kexec_crash_image->control_code_page));
- /* Control code page is located in the 2nd page. */
- kexec_mark_range(crashk_res.start, control + PAGE_SIZE - 1, protect);
+ kexec_mark_range(crashk_res.start, control - 1, protect);
control += KEXEC_CONTROL_PAGE_SIZE;
kexec_mark_range(control, crashk_res.end, protect);
}
+/* make the memory storing dm crypt keys in/accessible */
+static void kexec_mark_dm_crypt_keys(bool protect)
+{
+ unsigned long start_paddr, end_paddr;
+ unsigned int nr_pages;
+
+ if (kexec_crash_image->dm_crypt_keys_addr) {
+ start_paddr = kexec_crash_image->dm_crypt_keys_addr;
+ end_paddr = start_paddr + kexec_crash_image->dm_crypt_keys_sz - 1;
+ nr_pages = (PAGE_ALIGN(end_paddr) - PAGE_ALIGN_DOWN(start_paddr))/PAGE_SIZE;
+ if (protect)
+ set_memory_np((unsigned long)phys_to_virt(start_paddr), nr_pages);
+ else
+ __set_memory_prot(
+ (unsigned long)phys_to_virt(start_paddr),
+ nr_pages,
+ __pgprot(_PAGE_PRESENT | _PAGE_NX | _PAGE_RW));
+ }
+}
+
void arch_kexec_protect_crashkres(void)
{
kexec_mark_crashkres(true);
+ kexec_mark_dm_crypt_keys(true);
}
void arch_kexec_unprotect_crashkres(void)
{
+ kexec_mark_dm_crypt_keys(false);
kexec_mark_crashkres(false);
}
#endif