diff options
Diffstat (limited to 'arch/x86/hyperv')
-rw-r--r-- | arch/x86/hyperv/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/hyperv/hv_apic.c | 33 | ||||
-rw-r--r-- | arch/x86/hyperv/hv_init.c | 179 | ||||
-rw-r--r-- | arch/x86/hyperv/hv_proc.c | 213 | ||||
-rw-r--r-- | arch/x86/hyperv/hv_spinlock.c | 7 | ||||
-rw-r--r-- | arch/x86/hyperv/hv_vtl.c | 102 | ||||
-rw-r--r-- | arch/x86/hyperv/irqdomain.c | 8 | ||||
-rw-r--r-- | arch/x86/hyperv/ivm.c | 52 | ||||
-rw-r--r-- | arch/x86/hyperv/mmu.c | 6 | ||||
-rw-r--r-- | arch/x86/hyperv/nested.c | 2 |
10 files changed, 188 insertions, 416 deletions
diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile index 3a1548054b48..d55f494f471d 100644 --- a/arch/x86/hyperv/Makefile +++ b/arch/x86/hyperv/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y := hv_init.o mmu.o nested.o irqdomain.o ivm.o -obj-$(CONFIG_X86_64) += hv_apic.o hv_proc.o +obj-$(CONFIG_X86_64) += hv_apic.o obj-$(CONFIG_HYPERV_VTL_MODE) += hv_vtl.o ifdef CONFIG_X86_64 diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 5fc45543e955..bfde0a3498b9 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -23,12 +23,12 @@ #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/clockchips.h> -#include <linux/hyperv.h> #include <linux/slab.h> #include <linux/cpuhotplug.h> #include <asm/hypervisor.h> #include <asm/mshyperv.h> #include <asm/apic.h> +#include <asm/msr.h> #include <asm/trace/hyperv.h> @@ -38,7 +38,7 @@ static u64 hv_apic_icr_read(void) { u64 reg_val; - rdmsrl(HV_X64_MSR_ICR, reg_val); + rdmsrq(HV_X64_MSR_ICR, reg_val); return reg_val; } @@ -50,7 +50,7 @@ static void hv_apic_icr_write(u32 low, u32 id) reg_val = reg_val << 32; reg_val |= low; - wrmsrl(HV_X64_MSR_ICR, reg_val); + wrmsrq(HV_X64_MSR_ICR, reg_val); } static u32 hv_apic_read(u32 reg) @@ -76,10 +76,10 @@ static void hv_apic_write(u32 reg, u32 val) { switch (reg) { case APIC_EOI: - wrmsr(HV_X64_MSR_EOI, val, 0); + wrmsrq(HV_X64_MSR_EOI, val); break; case APIC_TASKPRI: - wrmsr(HV_X64_MSR_TPR, val, 0); + wrmsrq(HV_X64_MSR_TPR, val); break; default: native_apic_mem_write(reg, val); @@ -93,7 +93,7 @@ static void hv_apic_eoi_write(void) if (hvp && (xchg(&hvp->apic_assist, 0) & 0x1)) return; - wrmsr(HV_X64_MSR_EOI, APIC_EOI_ACK, 0); + wrmsrq(HV_X64_MSR_EOI, APIC_EOI_ACK); } static bool cpu_is_self(int cpu) @@ -105,7 +105,7 @@ static bool cpu_is_self(int cpu) * IPI implementation on Hyper-V. */ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector, - bool exclude_self) + bool exclude_self) { struct hv_send_ipi_ex *ipi_arg; unsigned long flags; @@ -132,8 +132,8 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector, if (!cpumask_equal(mask, cpu_present_mask) || exclude_self) { ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K; - nr_bank = cpumask_to_vpset_skip(&(ipi_arg->vp_set), mask, - exclude_self ? cpu_is_self : NULL); + nr_bank = cpumask_to_vpset_skip(&ipi_arg->vp_set, mask, + exclude_self ? cpu_is_self : NULL); /* * 'nr_bank <= 0' means some CPUs in cpumask can't be @@ -146,8 +146,13 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector, ipi_arg->vp_set.format = HV_GENERIC_SET_ALL; } + /* + * For this hypercall, Hyper-V treats the valid_bank_mask field + * of ipi_arg->vp_set as part of the fixed size input header. + * So the variable input header size is equal to nr_bank. + */ status = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank, - ipi_arg, NULL); + ipi_arg, NULL); ipi_mask_ex_done: local_irq_restore(flags); @@ -155,7 +160,7 @@ ipi_mask_ex_done: } static bool __send_ipi_mask(const struct cpumask *mask, int vector, - bool exclude_self) + bool exclude_self) { int cur_cpu, vcpu, this_cpu = smp_processor_id(); struct hv_send_ipi ipi_arg; @@ -181,7 +186,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector, return false; } - if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) + if (vector < HV_IPI_LOW_VECTOR || vector > HV_IPI_HIGH_VECTOR) return false; /* @@ -218,7 +223,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector, } status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector, - ipi_arg.cpu_mask); + ipi_arg.cpu_mask); return hv_result_success(status); do_ex_hypercall: @@ -241,7 +246,7 @@ static bool __send_ipi_one(int cpu, int vector) return false; } - if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) + if (vector < HV_IPI_LOW_VECTOR || vector > HV_IPI_HIGH_VECTOR) return false; if (vp >= 64) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 17a71e92a343..3d1d3547095a 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -19,15 +19,15 @@ #include <asm/sev.h> #include <asm/ibt.h> #include <asm/hypervisor.h> -#include <asm/hyperv-tlfs.h> +#include <hyperv/hvhdk.h> #include <asm/mshyperv.h> +#include <asm/msr.h> #include <asm/idtentry.h> #include <asm/set_memory.h> #include <linux/kexec.h> #include <linux/version.h> #include <linux/vmalloc.h> #include <linux/mm.h> -#include <linux/hyperv.h> #include <linux/slab.h> #include <linux/kernel.h> #include <linux/cpuhotplug.h> @@ -35,10 +35,6 @@ #include <clocksource/hyperv_timer.h> #include <linux/highmem.h> -int hyperv_init_cpuhp; -u64 hv_current_partition_id = ~0ull; -EXPORT_SYMBOL_GPL(hv_current_partition_id); - void *hv_hypercall_pg; EXPORT_SYMBOL_GPL(hv_hypercall_pg); @@ -67,7 +63,7 @@ static int hyperv_init_ghcb(void) * returned by MSR_AMD64_SEV_ES_GHCB is above shared * memory boundary and map it here. */ - rdmsrl(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa); + rdmsrq(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa); /* Mask out vTOM bit. ioremap_cache() maps decrypted */ ghcb_gpa &= ~ms_hyperv.shared_gpa_boundary; @@ -95,12 +91,12 @@ static int hv_cpu_init(unsigned int cpu) return 0; hvp = &hv_vp_assist_page[cpu]; - if (hv_root_partition) { + if (hv_root_partition()) { /* * For root partition we get the hypervisor provided VP assist * page, instead of allocating a new page. */ - rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); + rdmsrq(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); *hvp = memremap(msr.pfn << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT, PAGE_SIZE, MEMREMAP_WB); } else { @@ -133,7 +129,7 @@ static int hv_cpu_init(unsigned int cpu) } if (!WARN_ON(!(*hvp))) { msr.enable = 1; - wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); + wrmsrq(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); } return hyperv_init_ghcb(); @@ -145,7 +141,7 @@ static void hv_reenlightenment_notify(struct work_struct *dummy) { struct hv_tsc_emulation_status emu_status; - rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); + rdmsrq(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); /* Don't issue the callback if TSC accesses are not emulated */ if (hv_reenlightenment_cb && emu_status.inprogress) @@ -158,11 +154,11 @@ void hyperv_stop_tsc_emulation(void) u64 freq; struct hv_tsc_emulation_status emu_status; - rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); + rdmsrq(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); emu_status.inprogress = 0; - wrmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); + wrmsrq(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); - rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq); + rdmsrq(HV_X64_MSR_TSC_FREQUENCY, freq); tsc_khz = div64_u64(freq, 1000); } EXPORT_SYMBOL_GPL(hyperv_stop_tsc_emulation); @@ -208,8 +204,8 @@ void set_hv_tscchange_cb(void (*cb)(void)) re_ctrl.target_vp = hv_vp_index[get_cpu()]; - wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); - wrmsrl(HV_X64_MSR_TSC_EMULATION_CONTROL, *((u64 *)&emu_ctrl)); + wrmsrq(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); + wrmsrq(HV_X64_MSR_TSC_EMULATION_CONTROL, *((u64 *)&emu_ctrl)); put_cpu(); } @@ -222,9 +218,9 @@ void clear_hv_tscchange_cb(void) if (!hv_reenlightenment_available()) return; - rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl); + rdmsrq(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl); re_ctrl.enabled = 0; - wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl); + wrmsrq(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl); hv_reenlightenment_cb = NULL; } @@ -247,7 +243,7 @@ static int hv_cpu_die(unsigned int cpu) if (hv_vp_assist_page && hv_vp_assist_page[cpu]) { union hv_vp_assist_msr_contents msr = { 0 }; - if (hv_root_partition) { + if (hv_root_partition()) { /* * For root partition the VP assist page is mapped to * hypervisor provided page, and thus we unmap the @@ -256,16 +252,16 @@ static int hv_cpu_die(unsigned int cpu) */ memunmap(hv_vp_assist_page[cpu]); hv_vp_assist_page[cpu] = NULL; - rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); + rdmsrq(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); msr.enable = 0; } - wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); + wrmsrq(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); } if (hv_reenlightenment_cb == NULL) return 0; - rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); + rdmsrq(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); if (re_ctrl.target_vp == hv_vp_index[cpu]) { /* * Reassign reenlightenment notifications to some other online @@ -279,7 +275,7 @@ static int hv_cpu_die(unsigned int cpu) else re_ctrl.enabled = 0; - wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); + wrmsrq(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); } return 0; @@ -322,7 +318,7 @@ static int hv_suspend(void) union hv_x64_msr_hypercall_contents hypercall_msr; int ret; - if (hv_root_partition) + if (hv_root_partition()) return -EPERM; /* @@ -336,9 +332,9 @@ static int hv_suspend(void) hv_hypercall_pg = NULL; /* Disable the hypercall page in the hypervisor */ - rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + rdmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); hypercall_msr.enable = 0; - wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + wrmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); ret = hv_cpu_die(0); return ret; @@ -353,11 +349,11 @@ static void hv_resume(void) WARN_ON(ret); /* Re-enable the hypercall page */ - rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + rdmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); hypercall_msr.enable = 1; hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg_saved); - wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + wrmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); hv_hypercall_pg = hv_hypercall_pg_saved; hv_hypercall_pg_saved = NULL; @@ -395,58 +391,6 @@ static void __init hv_stimer_setup_percpu_clockev(void) old_setup_percpu_clockev(); } -static void __init hv_get_partition_id(void) -{ - struct hv_get_partition_id *output_page; - u64 status; - unsigned long flags; - - local_irq_save(flags); - output_page = *this_cpu_ptr(hyperv_pcpu_output_arg); - status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output_page); - if (!hv_result_success(status)) { - /* No point in proceeding if this failed */ - pr_err("Failed to get partition ID: %lld\n", status); - BUG(); - } - hv_current_partition_id = output_page->partition_id; - local_irq_restore(flags); -} - -#if IS_ENABLED(CONFIG_HYPERV_VTL_MODE) -static u8 __init get_vtl(void) -{ - u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_REGISTERS; - struct hv_get_vp_registers_input *input; - struct hv_get_vp_registers_output *output; - unsigned long flags; - u64 ret; - - local_irq_save(flags); - input = *this_cpu_ptr(hyperv_pcpu_input_arg); - output = (struct hv_get_vp_registers_output *)input; - - memset(input, 0, struct_size(input, element, 1)); - input->header.partitionid = HV_PARTITION_ID_SELF; - input->header.vpindex = HV_VP_INDEX_SELF; - input->header.inputvtl = 0; - input->element[0].name0 = HV_X64_REGISTER_VSM_VP_STATUS; - - ret = hv_do_hypercall(control, input, output); - if (hv_result_success(ret)) { - ret = output->as64.low & HV_X64_VTL_MASK; - } else { - pr_err("Failed to get VTL(error: %lld) exiting...\n", ret); - BUG(); - } - - local_irq_restore(flags); - return ret; -} -#else -static inline u8 get_vtl(void) { return 0; } -#endif - /* * This function is to be invoked early in the boot sequence after the * hypervisor has been detected. @@ -474,7 +418,7 @@ void __init hyperv_init(void) if (hv_isolation_type_tdx()) hv_vp_assist_page = NULL; else - hv_vp_assist_page = kcalloc(num_possible_cpus(), + hv_vp_assist_page = kcalloc(nr_cpu_ids, sizeof(*hv_vp_assist_page), GFP_KERNEL); if (!hv_vp_assist_page) { @@ -522,7 +466,7 @@ void __init hyperv_init(void) * in such a VM and is only used in such a VM. */ guest_id = hv_generate_guest_id(LINUX_VERSION_CODE); - wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); + wrmsrq(HV_X64_MSR_GUEST_OS_ID, guest_id); /* With the paravisor, the VM must also write the ID via GHCB/GHCI */ hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id); @@ -538,10 +482,10 @@ void __init hyperv_init(void) if (hv_hypercall_pg == NULL) goto clean_guest_os_id; - rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + rdmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); hypercall_msr.enable = 1; - if (hv_root_partition) { + if (hv_root_partition()) { struct page *pg; void *src; @@ -555,7 +499,7 @@ void __init hyperv_init(void) * so it is populated with code, then copy the code to an * executable page. */ - wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + wrmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); pg = vmalloc_to_page(hv_hypercall_pg); src = memremap(hypercall_msr.guest_physical_address << PAGE_SHIFT, PAGE_SIZE, @@ -567,7 +511,7 @@ void __init hyperv_init(void) hv_remap_tsc_clocksource(); } else { hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); - wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + wrmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); } skip_hypercall_pg_init: @@ -607,19 +551,15 @@ skip_hypercall_pg_init: register_syscore_ops(&hv_syscore_ops); - hyperv_init_cpuhp = cpuhp; - - if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_ACCESS_PARTITION_ID) + if (ms_hyperv.priv_high & HV_ACCESS_PARTITION_ID) hv_get_partition_id(); - BUG_ON(hv_root_partition && hv_current_partition_id == ~0ull); - #ifdef CONFIG_PCI_MSI /* * If we're running as root, we want to create our own PCI MSI domain. * We can't set this in hv_pci_init because that would be too late. */ - if (hv_root_partition) + if (hv_root_partition()) x86_init.irqs.create_pci_msi_domain = hv_create_pci_msi_domain; #endif @@ -635,9 +575,9 @@ skip_hypercall_pg_init: return; clean_guest_os_id: - wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); + wrmsrq(HV_X64_MSR_GUEST_OS_ID, 0); hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); - cpuhp_remove_state(cpuhp); + cpuhp_remove_state(CPUHP_AP_HYPERV_ONLINE); free_ghcb_page: free_percpu(hv_ghcb_pg); free_vp_assist_page: @@ -656,7 +596,7 @@ void hyperv_cleanup(void) union hv_reference_tsc_msr tsc_msr; /* Reset our OS id */ - wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); + wrmsrq(HV_X64_MSR_GUEST_OS_ID, 0); hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); /* @@ -694,18 +634,18 @@ void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die) return; panic_reported = true; - rdmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); + rdmsrq(HV_X64_MSR_GUEST_OS_ID, guest_id); - wrmsrl(HV_X64_MSR_CRASH_P0, err); - wrmsrl(HV_X64_MSR_CRASH_P1, guest_id); - wrmsrl(HV_X64_MSR_CRASH_P2, regs->ip); - wrmsrl(HV_X64_MSR_CRASH_P3, regs->ax); - wrmsrl(HV_X64_MSR_CRASH_P4, regs->sp); + wrmsrq(HV_X64_MSR_CRASH_P0, err); + wrmsrq(HV_X64_MSR_CRASH_P1, guest_id); + wrmsrq(HV_X64_MSR_CRASH_P2, regs->ip); + wrmsrq(HV_X64_MSR_CRASH_P3, regs->ax); + wrmsrq(HV_X64_MSR_CRASH_P4, regs->sp); /* * Let Hyper-V know there is crash data available */ - wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY); + wrmsrq(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY); } EXPORT_SYMBOL_GPL(hyperv_report_panic); @@ -728,8 +668,41 @@ bool hv_is_hyperv_initialized(void) * that the hypercall page is setup */ hypercall_msr.as_uint64 = 0; - rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + rdmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); return hypercall_msr.enable; } EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized); + +int hv_apicid_to_vp_index(u32 apic_id) +{ + u64 control; + u64 status; + unsigned long irq_flags; + struct hv_get_vp_from_apic_id_in *input; + u32 *output, ret; + + local_irq_save(irq_flags); + + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + memset(input, 0, sizeof(*input)); + input->partition_id = HV_PARTITION_ID_SELF; + input->apic_ids[0] = apic_id; + + output = *this_cpu_ptr(hyperv_pcpu_output_arg); + + control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_INDEX_FROM_APIC_ID; + status = hv_do_hypercall(control, input, output); + ret = output[0]; + + local_irq_restore(irq_flags); + + if (!hv_result_success(status)) { + pr_err("failed to get vp index from apic id %d, status %#llx\n", + apic_id, status); + return -EINVAL; + } + + return ret; +} +EXPORT_SYMBOL_GPL(hv_apicid_to_vp_index); diff --git a/arch/x86/hyperv/hv_proc.c b/arch/x86/hyperv/hv_proc.c deleted file mode 100644 index 68a0843d4750..000000000000 --- a/arch/x86/hyperv/hv_proc.c +++ /dev/null @@ -1,213 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/types.h> -#include <linux/vmalloc.h> -#include <linux/mm.h> -#include <linux/clockchips.h> -#include <linux/acpi.h> -#include <linux/hyperv.h> -#include <linux/slab.h> -#include <linux/cpuhotplug.h> -#include <linux/minmax.h> -#include <asm/hypervisor.h> -#include <asm/mshyperv.h> -#include <asm/apic.h> - -#include <asm/trace/hyperv.h> - -/* - * See struct hv_deposit_memory. The first u64 is partition ID, the rest - * are GPAs. - */ -#define HV_DEPOSIT_MAX (HV_HYP_PAGE_SIZE / sizeof(u64) - 1) - -/* Deposits exact number of pages. Must be called with interrupts enabled. */ -int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages) -{ - struct page **pages, *page; - int *counts; - int num_allocations; - int i, j, page_count; - int order; - u64 status; - int ret; - u64 base_pfn; - struct hv_deposit_memory *input_page; - unsigned long flags; - - if (num_pages > HV_DEPOSIT_MAX) - return -E2BIG; - if (!num_pages) - return 0; - - /* One buffer for page pointers and counts */ - page = alloc_page(GFP_KERNEL); - if (!page) - return -ENOMEM; - pages = page_address(page); - - counts = kcalloc(HV_DEPOSIT_MAX, sizeof(int), GFP_KERNEL); - if (!counts) { - free_page((unsigned long)pages); - return -ENOMEM; - } - - /* Allocate all the pages before disabling interrupts */ - i = 0; - - while (num_pages) { - /* Find highest order we can actually allocate */ - order = 31 - __builtin_clz(num_pages); - - while (1) { - pages[i] = alloc_pages_node(node, GFP_KERNEL, order); - if (pages[i]) - break; - if (!order) { - ret = -ENOMEM; - num_allocations = i; - goto err_free_allocations; - } - --order; - } - - split_page(pages[i], order); - counts[i] = 1 << order; - num_pages -= counts[i]; - i++; - } - num_allocations = i; - - local_irq_save(flags); - - input_page = *this_cpu_ptr(hyperv_pcpu_input_arg); - - input_page->partition_id = partition_id; - - /* Populate gpa_page_list - these will fit on the input page */ - for (i = 0, page_count = 0; i < num_allocations; ++i) { - base_pfn = page_to_pfn(pages[i]); - for (j = 0; j < counts[i]; ++j, ++page_count) - input_page->gpa_page_list[page_count] = base_pfn + j; - } - status = hv_do_rep_hypercall(HVCALL_DEPOSIT_MEMORY, - page_count, 0, input_page, NULL); - local_irq_restore(flags); - if (!hv_result_success(status)) { - pr_err("Failed to deposit pages: %lld\n", status); - ret = hv_result(status); - goto err_free_allocations; - } - - ret = 0; - goto free_buf; - -err_free_allocations: - for (i = 0; i < num_allocations; ++i) { - base_pfn = page_to_pfn(pages[i]); - for (j = 0; j < counts[i]; ++j) - __free_page(pfn_to_page(base_pfn + j)); - } - -free_buf: - free_page((unsigned long)pages); - kfree(counts); - return ret; -} - -int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id) -{ - struct hv_add_logical_processor_in *input; - struct hv_add_logical_processor_out *output; - u64 status; - unsigned long flags; - int ret = HV_STATUS_SUCCESS; - int pxm = node_to_pxm(node); - - /* - * When adding a logical processor, the hypervisor may return - * HV_STATUS_INSUFFICIENT_MEMORY. When that happens, we deposit more - * pages and retry. - */ - do { - local_irq_save(flags); - - input = *this_cpu_ptr(hyperv_pcpu_input_arg); - /* We don't do anything with the output right now */ - output = *this_cpu_ptr(hyperv_pcpu_output_arg); - - input->lp_index = lp_index; - input->apic_id = apic_id; - input->flags = 0; - input->proximity_domain_info.domain_id = pxm; - input->proximity_domain_info.flags.reserved = 0; - input->proximity_domain_info.flags.proximity_info_valid = 1; - input->proximity_domain_info.flags.proximity_preferred = 1; - status = hv_do_hypercall(HVCALL_ADD_LOGICAL_PROCESSOR, - input, output); - local_irq_restore(flags); - - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { - if (!hv_result_success(status)) { - pr_err("%s: cpu %u apic ID %u, %lld\n", __func__, - lp_index, apic_id, status); - ret = hv_result(status); - } - break; - } - ret = hv_call_deposit_pages(node, hv_current_partition_id, 1); - } while (!ret); - - return ret; -} - -int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags) -{ - struct hv_create_vp *input; - u64 status; - unsigned long irq_flags; - int ret = HV_STATUS_SUCCESS; - int pxm = node_to_pxm(node); - - /* Root VPs don't seem to need pages deposited */ - if (partition_id != hv_current_partition_id) { - /* The value 90 is empirically determined. It may change. */ - ret = hv_call_deposit_pages(node, partition_id, 90); - if (ret) - return ret; - } - - do { - local_irq_save(irq_flags); - - input = *this_cpu_ptr(hyperv_pcpu_input_arg); - - input->partition_id = partition_id; - input->vp_index = vp_index; - input->flags = flags; - input->subnode_type = HvSubnodeAny; - if (node != NUMA_NO_NODE) { - input->proximity_domain_info.domain_id = pxm; - input->proximity_domain_info.flags.reserved = 0; - input->proximity_domain_info.flags.proximity_info_valid = 1; - input->proximity_domain_info.flags.proximity_preferred = 1; - } else { - input->proximity_domain_info.as_uint64 = 0; - } - status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL); - local_irq_restore(irq_flags); - - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { - if (!hv_result_success(status)) { - pr_err("%s: vcpu %u, lp %u, %lld\n", __func__, - vp_index, flags, status); - ret = hv_result(status); - } - break; - } - ret = hv_call_deposit_pages(node, partition_id, 1); - - } while (!ret); - - return ret; -} - diff --git a/arch/x86/hyperv/hv_spinlock.c b/arch/x86/hyperv/hv_spinlock.c index 151e851bef09..81b006601370 100644 --- a/arch/x86/hyperv/hv_spinlock.c +++ b/arch/x86/hyperv/hv_spinlock.c @@ -15,6 +15,7 @@ #include <asm/mshyperv.h> #include <asm/paravirt.h> #include <asm/apic.h> +#include <asm/msr.h> static bool hv_pvspin __initdata = true; @@ -39,18 +40,18 @@ static void hv_qlock_wait(u8 *byte, u8 val) * To prevent a race against the unlock path it is required to * disable interrupts before accessing the HV_X64_MSR_GUEST_IDLE * MSR. Otherwise, if the IPI from hv_qlock_kick() arrives between - * the lock value check and the rdmsrl() then the vCPU might be put + * the lock value check and the rdmsrq() then the vCPU might be put * into 'idle' state by the hypervisor and kept in that state for * an unspecified amount of time. */ local_irq_save(flags); /* - * Only issue the rdmsrl() when the lock state has not changed. + * Only issue the rdmsrq() when the lock state has not changed. */ if (READ_ONCE(*byte) == val) { unsigned long msr_val; - rdmsrl(HV_X64_MSR_GUEST_IDLE, msr_val); + rdmsrq(HV_X64_MSR_GUEST_IDLE, msr_val); (void)msr_val; } diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 5c7de79423b8..042e8712d8de 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -11,7 +11,9 @@ #include <asm/desc.h> #include <asm/i8259.h> #include <asm/mshyperv.h> +#include <asm/msr.h> #include <asm/realmode.h> +#include <asm/reboot.h> #include <../kernel/smpboot.h> extern struct boot_params boot_params; @@ -22,19 +24,54 @@ static bool __init hv_vtl_msi_ext_dest_id(void) return true; } +/* + * The `native_machine_emergency_restart` function from `reboot.c` writes + * to the physical address 0x472 to indicate the type of reboot for the + * firmware. We cannot have that in VSM as the memory composition might + * be more generic, and such write effectively corrupts the memory thus + * making diagnostics harder at the very least. + */ +static void __noreturn hv_vtl_emergency_restart(void) +{ + /* + * Cause a triple fault and the immediate reset. Here the code does not run + * on the top of any firmware, whereby cannot reach out to its services. + * The inifinite loop is for the improbable case that the triple fault does + * not work and have to preserve the state intact for debugging. + */ + for (;;) { + idt_invalidate(); + __asm__ __volatile__("int3"); + } +} + +/* + * The only way to restart in the VTL mode is to triple fault as the kernel runs + * as firmware. + */ +static void __noreturn hv_vtl_restart(char __maybe_unused *cmd) +{ + hv_vtl_emergency_restart(); +} + void __init hv_vtl_init_platform(void) { - pr_info("Linux runs in Hyper-V Virtual Trust Level\n"); + /* + * This function is a no-op if the VTL mode is not enabled. + * If it is, this function runs if and only the kernel boots in + * VTL2 which the x86 hv initialization path makes sure of. + */ + pr_info("Linux runs in Hyper-V Virtual Trust Level %d\n", ms_hyperv.vtl); x86_platform.realmode_reserve = x86_init_noop; x86_platform.realmode_init = x86_init_noop; x86_init.irqs.pre_vector_init = x86_init_noop; x86_init.timers.timer_init = x86_init_noop; + x86_init.resources.probe_roms = x86_init_noop; /* Avoid searching for BIOS MP tables */ x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; - x86_init.mpparse.parse_smp_cfg = x86_init_noop; x86_platform.get_wallclock = get_rtc_noop; x86_platform.set_wallclock = set_rtc_noop; @@ -118,11 +155,11 @@ static int hv_vtl_bringup_vcpu(u32 target_vp_index, int cpu, u64 eip_ignored) input->vp_context.rip = rip; input->vp_context.rsp = rsp; input->vp_context.rflags = 0x0000000000000002; - input->vp_context.efer = __rdmsr(MSR_EFER); + input->vp_context.efer = native_rdmsrq(MSR_EFER); input->vp_context.cr0 = native_read_cr0(); input->vp_context.cr3 = __native_read_cr3(); input->vp_context.cr4 = native_read_cr4(); - input->vp_context.msr_cr_pat = __rdmsr(MSR_IA32_CR_PAT); + input->vp_context.msr_cr_pat = native_rdmsrq(MSR_IA32_CR_PAT); input->vp_context.idtr.limit = idt_ptr.size; input->vp_context.idtr.base = idt_ptr.address; input->vp_context.gdtr.limit = gdt_ptr.size; @@ -175,67 +212,30 @@ free_lock: return ret; } -static int hv_vtl_apicid_to_vp_id(u32 apic_id) -{ - u64 control; - u64 status; - unsigned long irq_flags; - struct hv_get_vp_from_apic_id_in *input; - u32 *output, ret; - - local_irq_save(irq_flags); - - input = *this_cpu_ptr(hyperv_pcpu_input_arg); - memset(input, 0, sizeof(*input)); - input->partition_id = HV_PARTITION_ID_SELF; - input->apic_ids[0] = apic_id; - - output = (u32 *)input; - - control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_ID_FROM_APIC_ID; - status = hv_do_hypercall(control, input, output); - ret = output[0]; - - local_irq_restore(irq_flags); - - if (!hv_result_success(status)) { - pr_err("failed to get vp id from apic id %d, status %#llx\n", - apic_id, status); - return -EINVAL; - } - - return ret; -} - -static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip) +static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip, unsigned int cpu) { - int vp_id, cpu; - - /* Find the logical CPU for the APIC ID */ - for_each_present_cpu(cpu) { - if (arch_match_cpu_phys_id(cpu, apicid)) - break; - } - if (cpu >= nr_cpu_ids) - return -EINVAL; + int vp_index; pr_debug("Bringing up CPU with APIC ID %d in VTL2...\n", apicid); - vp_id = hv_vtl_apicid_to_vp_id(apicid); + vp_index = hv_apicid_to_vp_index(apicid); - if (vp_id < 0) { + if (vp_index < 0) { pr_err("Couldn't find CPU with APIC ID %d\n", apicid); return -EINVAL; } - if (vp_id > ms_hyperv.max_vp_index) { - pr_err("Invalid CPU id %d for APIC ID %d\n", vp_id, apicid); + if (vp_index > ms_hyperv.max_vp_index) { + pr_err("Invalid CPU id %d for APIC ID %d\n", vp_index, apicid); return -EINVAL; } - return hv_vtl_bringup_vcpu(vp_id, cpu, start_eip); + return hv_vtl_bringup_vcpu(vp_index, cpu, start_eip); } int __init hv_vtl_early_init(void) { + machine_ops.emergency_restart = hv_vtl_emergency_restart; + machine_ops.restart = hv_vtl_restart; + /* * `boot_cpu_has` returns the runtime feature support, * and here is the earliest it can be used. diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c index 3215a4a07408..31f0d29cbc5e 100644 --- a/arch/x86/hyperv/irqdomain.c +++ b/arch/x86/hyperv/irqdomain.c @@ -64,7 +64,7 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level, local_irq_restore(flags); if (!hv_result_success(status)) - pr_err("%s: hypercall failed, status %lld\n", __func__, status); + hv_status_err(status, "\n"); return hv_result(status); } @@ -224,7 +224,7 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) kfree(stored_entry); if (status != HV_STATUS_SUCCESS) { - pr_debug("%s: failed to unmap, status %lld", __func__, status); + hv_status_debug(status, "failed to unmap\n"); return; } } @@ -273,7 +273,7 @@ static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd) status = hv_unmap_msi_interrupt(dev, &old_entry); if (status != HV_STATUS_SUCCESS) - pr_err("%s: hypercall failed, status %lld\n", __func__, status); + hv_status_err(status, "\n"); } static void hv_msi_free_irq(struct irq_domain *domain, @@ -304,7 +304,7 @@ static struct irq_chip hv_pci_msi_controller = { .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_compose_msi_msg = hv_irq_compose_msi_msg, .irq_set_affinity = msi_domain_set_affinity, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED, }; static struct msi_domain_ops pci_msi_domain_ops = { diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 768d73de0d09..e93a2f488ff7 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -7,9 +7,9 @@ */ #include <linux/bitfield.h> -#include <linux/hyperv.h> #include <linux/types.h> #include <linux/slab.h> +#include <linux/cpu.h> #include <asm/svm.h> #include <asm/sev.h> #include <asm/io.h> @@ -23,6 +23,7 @@ #include <asm/realmode.h> #include <asm/e820/api.h> #include <asm/desc.h> +#include <asm/msr.h> #include <uapi/asm/vmx.h> #ifdef CONFIG_AMD_MEM_ENCRYPT @@ -111,12 +112,12 @@ u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size) static inline u64 rd_ghcb_msr(void) { - return __rdmsr(MSR_AMD64_SEV_ES_GHCB); + return native_rdmsrq(MSR_AMD64_SEV_ES_GHCB); } static inline void wr_ghcb_msr(u64 val) { - native_wrmsrl(MSR_AMD64_SEV_ES_GHCB, val); + native_wrmsrq(MSR_AMD64_SEV_ES_GHCB, val); } static enum es_result hv_ghcb_hv_call(struct ghcb *ghcb, u64 exit_code, @@ -289,7 +290,7 @@ static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) free_page((unsigned long)vmsa); } -int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) +int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu) { struct sev_es_save_area *vmsa = (struct sev_es_save_area *) __get_free_page(GFP_KERNEL | __GFP_ZERO); @@ -298,10 +299,16 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) u64 ret, retry = 5; struct hv_enable_vp_vtl *start_vp_input; unsigned long flags; + int vp_index; if (!vmsa) return -ENOMEM; + /* Find the Hyper-V VP index which might be not the same as APIC ID */ + vp_index = hv_apicid_to_vp_index(apic_id); + if (vp_index < 0 || vp_index > ms_hyperv.max_vp_index) + return -EINVAL; + native_store_gdt(&gdtr); vmsa->gdtr.base = gdtr.address; @@ -321,9 +328,9 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) vmsa->efer = native_read_msr(MSR_EFER); - asm volatile("movq %%cr4, %%rax;" : "=a" (vmsa->cr4)); - asm volatile("movq %%cr3, %%rax;" : "=a" (vmsa->cr3)); - asm volatile("movq %%cr0, %%rax;" : "=a" (vmsa->cr0)); + vmsa->cr4 = native_read_cr4(); + vmsa->cr3 = __native_read_cr3(); + vmsa->cr0 = native_read_cr0(); vmsa->xcr0 = 1; vmsa->g_pat = HV_AP_INIT_GPAT_DEFAULT; @@ -339,7 +346,7 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) vmsa->sev_features = sev_status >> 2; ret = snp_set_vmsa(vmsa, true); - if (!ret) { + if (ret) { pr_err("RMPADJUST(%llx) failed: %llx\n", (u64)vmsa, ret); free_page((u64)vmsa); return ret; @@ -349,7 +356,7 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) start_vp_input = (struct hv_enable_vp_vtl *)ap_start_input_arg; memset(start_vp_input, 0, sizeof(*start_vp_input)); start_vp_input->partition_id = -1; - start_vp_input->vp_index = cpu; + start_vp_input->vp_index = vp_index; start_vp_input->target_vtl.target_vtl = ms_hyperv.vtl; *(u64 *)&start_vp_input->vp_context = __pa(vmsa) | 1; @@ -465,7 +472,6 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], enum hv_mem_host_visibility visibility) { struct hv_gpa_range_for_visibility *input; - u16 pages_processed; u64 hv_status; unsigned long flags; @@ -494,7 +500,7 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], memcpy((void *)input->gpa_page_list, pfn, count * sizeof(*pfn)); hv_status = hv_do_rep_hypercall( HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, count, - 0, input, &pages_processed); + 0, input, NULL); local_irq_restore(flags); if (hv_result_success(hv_status)) @@ -523,9 +529,9 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], * transition is complete, hv_vtom_set_host_visibility() marks the pages * as "present" again. */ -static bool hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc) +static int hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc) { - return !set_memory_np(kbuffer, pagecount); + return set_memory_np(kbuffer, pagecount); } /* @@ -536,20 +542,19 @@ static bool hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc * with host. This function works as wrap of hv_mark_gpa_visibility() * with memory base and size. */ -static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bool enc) +static int hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bool enc) { enum hv_mem_host_visibility visibility = enc ? VMBUS_PAGE_NOT_VISIBLE : VMBUS_PAGE_VISIBLE_READ_WRITE; u64 *pfn_array; phys_addr_t paddr; + int i, pfn, err; void *vaddr; int ret = 0; - bool result = true; - int i, pfn; pfn_array = kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); if (!pfn_array) { - result = false; + ret = -ENOMEM; goto err_set_memory_p; } @@ -568,10 +573,8 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) { ret = hv_mark_gpa_visibility(pfn, pfn_array, visibility); - if (ret) { - result = false; + if (ret) goto err_free_pfn_array; - } pfn = 0; } } @@ -586,10 +589,11 @@ err_set_memory_p: * order to avoid leaving the memory range in a "broken" state. Setting * the PRESENT bits shouldn't fail, but return an error if it does. */ - if (set_memory_p(kbuffer, pagecount)) - result = false; + err = set_memory_p(kbuffer, pagecount); + if (err && !ret) + ret = err; - return result; + return ret; } static bool hv_vtom_tlb_flush_required(bool private) @@ -666,7 +670,7 @@ void __init hv_vtom_init(void) x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility; /* Set WB as the default cache mode. */ - mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK); + guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK); } #endif /* defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) */ diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 1cc113200ff5..cfcb60468b01 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -1,6 +1,5 @@ #define pr_fmt(fmt) "Hyper-V: " fmt -#include <linux/hyperv.h> #include <linux/log2.h> #include <linux/slab.h> #include <linux/types.h> @@ -206,6 +205,10 @@ static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus, /* * We can flush not more than max_gvas with one hypercall. Flush the * whole address space if we were asked to do more. + * + * For these hypercalls, Hyper-V treats the valid_bank_mask field + * of flush->hv_vp_set as part of the fixed size input header. + * So the variable input header size is equal to nr_bank. */ max_gvas = (PAGE_SIZE - sizeof(*flush) - nr_bank * @@ -240,5 +243,4 @@ void hyperv_setup_mmu_ops(void) pr_info("Using hypercall for remote TLB flush\n"); pv_ops.mmu.flush_tlb_multi = hyperv_flush_tlb_multi; - pv_ops.mmu.tlb_remove_table = tlb_remove_table; } diff --git a/arch/x86/hyperv/nested.c b/arch/x86/hyperv/nested.c index 9dc259fa322e..1083dc8646f9 100644 --- a/arch/x86/hyperv/nested.c +++ b/arch/x86/hyperv/nested.c @@ -11,7 +11,7 @@ #include <linux/types.h> -#include <asm/hyperv-tlfs.h> +#include <hyperv/hvhdk.h> #include <asm/mshyperv.h> #include <asm/tlbflush.h> |