aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/x86/kernel/crash.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/crash.c')
-rw-r--r--arch/x86/kernel/crash.c282
1 files changed, 213 insertions, 69 deletions
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 9730c88530fc..c6b12bed173d 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -26,6 +26,7 @@
#include <linux/vmalloc.h>
#include <linux/memblock.h>
+#include <asm/bootparam.h>
#include <asm/processor.h>
#include <asm/hardirq.h>
#include <asm/nmi.h>
@@ -37,10 +38,10 @@
#include <linux/kdebug.h>
#include <asm/cpu.h>
#include <asm/reboot.h>
-#include <asm/virtext.h>
#include <asm/intel_pt.h>
#include <asm/crash.h>
#include <asm/cmdline.h>
+#include <asm/sev.h>
/* Used while preparing memory map entries for second kernel */
struct crash_memmap_data {
@@ -49,27 +50,6 @@ struct crash_memmap_data {
unsigned int type;
};
-/*
- * This is used to VMCLEAR all VMCSs loaded on the
- * processor. And when loading kvm_intel module, the
- * callback function pointer will be assigned.
- *
- * protected by rcu.
- */
-crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
-EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
-
-static inline void cpu_crash_vmclear_loaded_vmcss(void)
-{
- crash_vmclear_fn *do_vmclear_operation = NULL;
-
- rcu_read_lock();
- do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss);
- if (do_vmclear_operation)
- do_vmclear_operation();
- rcu_read_unlock();
-}
-
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
@@ -77,24 +57,12 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
crash_save_cpu(regs, cpu);
/*
- * VMCLEAR VMCSs loaded on all cpus if needed.
- */
- cpu_crash_vmclear_loaded_vmcss();
-
- /* Disable VMX or SVM if needed.
- *
- * We need to disable virtualization on all CPUs.
- * Having VMX or SVM enabled on any CPU may break rebooting
- * after the kdump kernel has finished its task.
- */
- cpu_emergency_vmxoff();
- cpu_emergency_svm_disable();
-
- /*
* Disable Intel PT to stop its logging
*/
cpu_emergency_stop_pt();
+ kdump_sev_callback();
+
disable_local_APIC();
}
@@ -143,17 +111,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
crash_smp_send_stop();
- /*
- * VMCLEAR VMCSs loaded on this cpu if needed.
- */
- cpu_crash_vmclear_loaded_vmcss();
-
- /* Booting kdump kernel with VMX or SVM enabled won't work,
- * because (among other limitations) we can't disable paging
- * with the virt flags.
- */
- cpu_emergency_vmxoff();
- cpu_emergency_svm_disable();
+ cpu_emergency_disable_virtualization();
/*
* Disable Intel PT to stop its logging
@@ -170,11 +128,22 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
#ifdef CONFIG_HPET_TIMER
hpet_disable();
#endif
- crash_save_cpu(regs, safe_smp_processor_id());
-}
-#ifdef CONFIG_KEXEC_FILE
+ /*
+ * Non-crash kexec calls enc_kexec_begin() while scheduling is still
+ * active. This allows the callback to wait until all in-flight
+ * shared<->private conversions are complete. In a crash scenario,
+ * enc_kexec_begin() gets called after all but one CPU have been shut
+ * down and interrupts have been disabled. This allows the callback to
+ * detect a race with the conversion and report it.
+ */
+ x86_platform.guest.enc_kexec_begin();
+ x86_platform.guest.enc_kexec_finish();
+
+ crash_save_cpu(regs, smp_processor_id());
+}
+#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_HOTPLUG)
static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
{
unsigned int *nr_ranges = arg;
@@ -194,10 +163,10 @@ static struct crash_mem *fill_up_crash_elf_data(void)
return NULL;
/*
- * Exclusion of crash region and/or crashk_low_res may cause
- * another range split. So add extra two slots here.
+ * Exclusion of crash region, crashk_low_res and/or crashk_cma_ranges
+ * may cause range splits. So add extra slots here.
*/
- nr_ranges += 2;
+ nr_ranges += 2 + crashk_cma_cnt;
cmem = vzalloc(struct_size(cmem, ranges, nr_ranges));
if (!cmem)
return NULL;
@@ -215,9 +184,10 @@ static struct crash_mem *fill_up_crash_elf_data(void)
static int elf_header_exclude_ranges(struct crash_mem *cmem)
{
int ret = 0;
+ int i;
/* Exclude the low 1M because it is always reserved */
- ret = crash_exclude_mem_range(cmem, 0, (1<<20)-1);
+ ret = crash_exclude_mem_range(cmem, 0, SZ_1M - 1);
if (ret)
return ret;
@@ -229,8 +199,17 @@ static int elf_header_exclude_ranges(struct crash_mem *cmem)
if (crashk_low_res.end)
ret = crash_exclude_mem_range(cmem, crashk_low_res.start,
crashk_low_res.end);
+ if (ret)
+ return ret;
- return ret;
+ for (i = 0; i < crashk_cma_cnt; ++i) {
+ ret = crash_exclude_mem_range(cmem, crashk_cma_ranges[i].start,
+ crashk_cma_ranges[i].end);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
}
static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
@@ -245,8 +224,8 @@ static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
}
/* Prepare elf headers. Return addr and size */
-static int prepare_elf_headers(struct kimage *image, void **addr,
- unsigned long *sz)
+static int prepare_elf_headers(void **addr, unsigned long *sz,
+ unsigned long *nr_mem_ranges)
{
struct crash_mem *cmem;
int ret;
@@ -264,14 +243,19 @@ static int prepare_elf_headers(struct kimage *image, void **addr,
if (ret)
goto out;
+ /* Return the computed number of memory ranges, for hotplug usage */
+ *nr_mem_ranges = cmem->nr_ranges;
+
/* By default prepare 64bit headers */
- ret = crash_prepare_elf64_headers(cmem, IS_ENABLED(CONFIG_X86_64), addr, sz);
+ ret = crash_prepare_elf64_headers(cmem, IS_ENABLED(CONFIG_X86_64), addr, sz);
out:
vfree(cmem);
return ret;
}
+#endif
+#ifdef CONFIG_KEXEC_FILE
static int add_e820_entry(struct boot_params *params, struct e820_entry *entry)
{
unsigned int nr_e820_entries;
@@ -304,6 +288,7 @@ static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem,
unsigned long long mend)
{
unsigned long start, end;
+ int ret;
cmem->ranges[0].start = mstart;
cmem->ranges[0].end = mend;
@@ -312,22 +297,43 @@ static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem,
/* Exclude elf header region */
start = image->elf_load_addr;
end = start + image->elf_headers_sz - 1;
- return crash_exclude_mem_range(cmem, start, end);
+ ret = crash_exclude_mem_range(cmem, start, end);
+
+ if (ret)
+ return ret;
+
+ /* Exclude dm crypt keys region */
+ if (image->dm_crypt_keys_addr) {
+ start = image->dm_crypt_keys_addr;
+ end = start + image->dm_crypt_keys_sz - 1;
+ return crash_exclude_mem_range(cmem, start, end);
+ }
+
+ return ret;
}
/* Prepare memory map for crash dump kernel */
int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
{
+ unsigned int nr_ranges = 0;
int i, ret = 0;
unsigned long flags;
struct e820_entry ei;
struct crash_memmap_data cmd;
struct crash_mem *cmem;
- cmem = vzalloc(struct_size(cmem, ranges, 1));
+ /*
+ * Using random kexec_buf for passing dm crypt keys may cause a range
+ * split. So use two slots here.
+ */
+ nr_ranges = 2;
+ cmem = vzalloc(struct_size(cmem, ranges, nr_ranges));
if (!cmem)
return -ENOMEM;
+ cmem->max_nr_ranges = nr_ranges;
+ cmem->nr_ranges = 0;
+
memset(&cmd, 0, sizeof(struct crash_memmap_data));
cmd.params = params;
@@ -378,6 +384,14 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
add_e820_entry(params, &ei);
}
+ for (i = 0; i < crashk_cma_cnt; ++i) {
+ ei.addr = crashk_cma_ranges[i].start;
+ ei.size = crashk_cma_ranges[i].end -
+ crashk_cma_ranges[i].start + 1;
+ ei.type = E820_TYPE_RAM;
+ add_e820_entry(params, &ei);
+ }
+
out:
vfree(cmem);
return ret;
@@ -386,29 +400,159 @@ out:
int crash_load_segments(struct kimage *image)
{
int ret;
+ unsigned long pnum = 0;
struct kexec_buf kbuf = { .image = image, .buf_min = 0,
.buf_max = ULONG_MAX, .top_down = false };
/* Prepare elf headers and add a segment */
- ret = prepare_elf_headers(image, &kbuf.buffer, &kbuf.bufsz);
+ ret = prepare_elf_headers(&kbuf.buffer, &kbuf.bufsz, &pnum);
if (ret)
return ret;
- image->elf_headers = kbuf.buffer;
- image->elf_headers_sz = kbuf.bufsz;
+ image->elf_headers = kbuf.buffer;
+ image->elf_headers_sz = kbuf.bufsz;
+ kbuf.memsz = kbuf.bufsz;
+
+#ifdef CONFIG_CRASH_HOTPLUG
+ /*
+ * The elfcorehdr segment size accounts for VMCOREINFO, kernel_map,
+ * maximum CPUs and maximum memory ranges.
+ */
+ if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
+ pnum = 2 + CONFIG_NR_CPUS_DEFAULT + CONFIG_CRASH_MAX_MEMORY_RANGES;
+ else
+ pnum += 2 + CONFIG_NR_CPUS_DEFAULT;
+
+ if (pnum < (unsigned long)PN_XNUM) {
+ kbuf.memsz = pnum * sizeof(Elf64_Phdr);
+ kbuf.memsz += sizeof(Elf64_Ehdr);
+
+ image->elfcorehdr_index = image->nr_segments;
+
+ /* Mark as usable to crash kernel, else crash kernel fails on boot */
+ image->elf_headers_sz = kbuf.memsz;
+ } else {
+ pr_err("number of Phdrs %lu exceeds max\n", pnum);
+ }
+#endif
- kbuf.memsz = kbuf.bufsz;
kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
ret = kexec_add_buffer(&kbuf);
- if (ret) {
- vfree((void *)image->elf_headers);
+ if (ret)
return ret;
- }
image->elf_load_addr = kbuf.mem;
- pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
+ kexec_dprintk("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
return ret;
}
#endif /* CONFIG_KEXEC_FILE */
+
+#ifdef CONFIG_CRASH_HOTPLUG
+
+#undef pr_fmt
+#define pr_fmt(fmt) "crash hp: " fmt
+
+int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags)
+{
+
+#ifdef CONFIG_KEXEC_FILE
+ if (image->file_mode)
+ return 1;
+#endif
+ /*
+ * Initially, crash hotplug support for kexec_load was added
+ * with the KEXEC_UPDATE_ELFCOREHDR flag. Later, this
+ * functionality was expanded to accommodate multiple kexec
+ * segment updates, leading to the introduction of the
+ * KEXEC_CRASH_HOTPLUG_SUPPORT kexec flag bit. Consequently,
+ * when the kexec tool sends either of these flags, it indicates
+ * that the required kexec segment (elfcorehdr) is excluded from
+ * the SHA calculation.
+ */
+ return (kexec_flags & KEXEC_UPDATE_ELFCOREHDR ||
+ kexec_flags & KEXEC_CRASH_HOTPLUG_SUPPORT);
+}
+
+unsigned int arch_crash_get_elfcorehdr_size(void)
+{
+ unsigned int sz;
+
+ /* kernel_map, VMCOREINFO and maximum CPUs */
+ sz = 2 + CONFIG_NR_CPUS_DEFAULT;
+ if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
+ sz += CONFIG_CRASH_MAX_MEMORY_RANGES;
+ sz *= sizeof(Elf64_Phdr);
+ return sz;
+}
+
+/**
+ * arch_crash_handle_hotplug_event() - Handle hotplug elfcorehdr changes
+ * @image: a pointer to kexec_crash_image
+ * @arg: struct memory_notify handler for memory hotplug case and
+ * NULL for CPU hotplug case.
+ *
+ * Prepare the new elfcorehdr and replace the existing elfcorehdr.
+ */
+void arch_crash_handle_hotplug_event(struct kimage *image, void *arg)
+{
+ void *elfbuf = NULL, *old_elfcorehdr;
+ unsigned long nr_mem_ranges;
+ unsigned long mem, memsz;
+ unsigned long elfsz = 0;
+
+ /*
+ * As crash_prepare_elf64_headers() has already described all
+ * possible CPUs, there is no need to update the elfcorehdr
+ * for additional CPU changes.
+ */
+ if ((image->file_mode || image->elfcorehdr_updated) &&
+ ((image->hp_action == KEXEC_CRASH_HP_ADD_CPU) ||
+ (image->hp_action == KEXEC_CRASH_HP_REMOVE_CPU)))
+ return;
+
+ /*
+ * Create the new elfcorehdr reflecting the changes to CPU and/or
+ * memory resources.
+ */
+ if (prepare_elf_headers(&elfbuf, &elfsz, &nr_mem_ranges)) {
+ pr_err("unable to create new elfcorehdr");
+ goto out;
+ }
+
+ /*
+ * Obtain address and size of the elfcorehdr segment, and
+ * check it against the new elfcorehdr buffer.
+ */
+ mem = image->segment[image->elfcorehdr_index].mem;
+ memsz = image->segment[image->elfcorehdr_index].memsz;
+ if (elfsz > memsz) {
+ pr_err("update elfcorehdr elfsz %lu > memsz %lu",
+ elfsz, memsz);
+ goto out;
+ }
+
+ /*
+ * Copy new elfcorehdr over the old elfcorehdr at destination.
+ */
+ old_elfcorehdr = kmap_local_page(pfn_to_page(mem >> PAGE_SHIFT));
+ if (!old_elfcorehdr) {
+ pr_err("mapping elfcorehdr segment failed\n");
+ goto out;
+ }
+
+ /*
+ * Temporarily invalidate the crash image while the
+ * elfcorehdr is updated.
+ */
+ xchg(&kexec_crash_image, NULL);
+ memcpy_flushcache(old_elfcorehdr, elfbuf, elfsz);
+ xchg(&kexec_crash_image, image);
+ kunmap_local(old_elfcorehdr);
+ pr_debug("updated elfcorehdr\n");
+
+out:
+ vfree(elfbuf);
+}
+#endif