aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2021-07-08 13:15:57 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2021-07-14 12:14:27 -0400
commitf3cf800778e9e76b2387d00c9bfbc2e16efdb7ed (patch)
tree783e27546a8d65df254f59b5a024dc42eb390582 /arch/x86
parentMerge tag 'kvmarm-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD (diff)
parentKVM: selftests: do not require 64GB in set_memory_region_test (diff)
downloadlinux-dev-f3cf800778e9e76b2387d00c9bfbc2e16efdb7ed.tar.xz
linux-dev-f3cf800778e9e76b2387d00c9bfbc2e16efdb7ed.zip
Merge tag 'kvm-s390-master-5.14-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD
KVM: selftests: Fixes - provide memory model for IBM z196 and zEC12 - do not require 64GB of memory
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Makefile5
-rw-r--r--arch/x86/events/intel/uncore_snbep.c9
-rw-r--r--arch/x86/include/asm/apic.h1
-rw-r--r--arch/x86/include/asm/disabled-features.h7
-rw-r--r--arch/x86/include/asm/fpu/api.h6
-rw-r--r--arch/x86/include/asm/fpu/internal.h20
-rw-r--r--arch/x86/include/asm/thermal.h4
-rw-r--r--arch/x86/kernel/alternative.c64
-rw-r--r--arch/x86/kernel/apic/apic.c1
-rw-r--r--arch/x86/kernel/apic/vector.c20
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c4
-rw-r--r--arch/x86/kernel/cpu/sgx/virt.c1
-rw-r--r--arch/x86/kernel/fpu/signal.c54
-rw-r--r--arch/x86/kernel/fpu/xstate.c57
-rw-r--r--arch/x86/kernel/setup.c44
-rw-r--r--arch/x86/mm/fault.c4
-rw-r--r--arch/x86/mm/ioremap.c4
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c11
-rw-r--r--arch/x86/mm/numa.c8
-rw-r--r--arch/x86/pci/fixup.c44
-rw-r--r--arch/x86/platform/efi/quirks.c12
-rw-r--r--arch/x86/realmode/init.c14
22 files changed, 243 insertions, 151 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 307529417021..cb5e8d39cac1 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -200,8 +200,9 @@ endif
KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
ifdef CONFIG_LTO_CLANG
-KBUILD_LDFLAGS += -plugin-opt=-code-model=kernel \
- -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8)
+ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 130000; echo $$?),0)
+KBUILD_LDFLAGS += -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8)
+endif
endif
ifdef CONFIG_X86_NEED_RELOCS
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 63f097289a84..3a75a2c601c2 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1406,6 +1406,8 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
die_id = i;
else
die_id = topology_phys_to_logical_pkg(i);
+ if (die_id < 0)
+ die_id = -ENODEV;
map->pbus_to_dieid[bus] = die_id;
break;
}
@@ -1452,14 +1454,14 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
i = -1;
if (reverse) {
for (bus = 255; bus >= 0; bus--) {
- if (map->pbus_to_dieid[bus] >= 0)
+ if (map->pbus_to_dieid[bus] != -1)
i = map->pbus_to_dieid[bus];
else
map->pbus_to_dieid[bus] = i;
}
} else {
for (bus = 0; bus <= 255; bus++) {
- if (map->pbus_to_dieid[bus] >= 0)
+ if (map->pbus_to_dieid[bus] != -1)
i = map->pbus_to_dieid[bus];
else
map->pbus_to_dieid[bus] = i;
@@ -5097,9 +5099,10 @@ static struct intel_uncore_type icx_uncore_m2m = {
.perf_ctr = SNR_M2M_PCI_PMON_CTR0,
.event_ctl = SNR_M2M_PCI_PMON_CTL0,
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .event_mask_ext = SNR_M2M_PCI_PMON_UMASK_EXT,
.box_ctl = SNR_M2M_PCI_PMON_BOX_CTL,
.ops = &snr_m2m_uncore_pci_ops,
- .format_group = &skx_uncore_format_group,
+ .format_group = &snr_m2m_uncore_format_group,
};
static struct attribute *icx_upi_uncore_formats_attr[] = {
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 412b51e059c8..48067af94678 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -174,6 +174,7 @@ static inline int apic_is_clustered_box(void)
extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
extern void lapic_assign_system_vectors(void);
extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace);
+extern void lapic_update_legacy_vectors(void);
extern void lapic_online(void);
extern void lapic_offline(void);
extern bool apic_needs_pit(void);
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index b7dd944dc867..8f28fafa98b3 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -56,11 +56,8 @@
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif
-#ifdef CONFIG_IOMMU_SUPPORT
-# define DISABLE_ENQCMD 0
-#else
-# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
-#endif
+/* Force disable because it's broken beyond repair */
+#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
#ifdef CONFIG_X86_SGX
# define DISABLE_SGX 0
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index ed33a14188f6..23bef08a8388 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -106,10 +106,6 @@ extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name);
*/
#define PASID_DISABLED 0
-#ifdef CONFIG_IOMMU_SUPPORT
-/* Update current's PASID MSR/state by mm's PASID. */
-void update_pasid(void);
-#else
static inline void update_pasid(void) { }
-#endif
+
#endif /* _ASM_X86_FPU_API_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 8d33ad80704f..fdee23ea4e17 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -578,19 +578,19 @@ static inline void switch_fpu_finish(struct fpu *new_fpu)
* PKRU state is switched eagerly because it needs to be valid before we
* return to userland e.g. for a copy_to_user() operation.
*/
- if (current->mm) {
+ if (!(current->flags & PF_KTHREAD)) {
+ /*
+ * If the PKRU bit in xsave.header.xfeatures is not set,
+ * then the PKRU component was in init state, which means
+ * XRSTOR will set PKRU to 0. If the bit is not set then
+ * get_xsave_addr() will return NULL because the PKRU value
+ * in memory is not valid. This means pkru_val has to be
+ * set to 0 and not to init_pkru_value.
+ */
pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU);
- if (pk)
- pkru_val = pk->pkru;
+ pkru_val = pk ? pk->pkru : 0;
}
__write_pkru(pkru_val);
-
- /*
- * Expensive PASID MSR write will be avoided in update_pasid() because
- * TIF_NEED_FPU_LOAD was set. And the PASID state won't be updated
- * unless it's different from mm->pasid to reduce overhead.
- */
- update_pasid();
}
#endif /* _ASM_X86_FPU_INTERNAL_H */
diff --git a/arch/x86/include/asm/thermal.h b/arch/x86/include/asm/thermal.h
index ddbdefd5b94f..91a7b6687c3b 100644
--- a/arch/x86/include/asm/thermal.h
+++ b/arch/x86/include/asm/thermal.h
@@ -3,11 +3,13 @@
#define _ASM_X86_THERMAL_H
#ifdef CONFIG_X86_THERMAL_VECTOR
+void therm_lvt_init(void);
void intel_init_thermal(struct cpuinfo_x86 *c);
bool x86_thermal_enabled(void);
void intel_thermal_interrupt(void);
#else
-static inline void intel_init_thermal(struct cpuinfo_x86 *c) { }
+static inline void therm_lvt_init(void) { }
+static inline void intel_init_thermal(struct cpuinfo_x86 *c) { }
#endif
#endif /* _ASM_X86_THERMAL_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 6974b5174495..6fe5b44fcbc9 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -183,41 +183,69 @@ done:
}
/*
+ * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90)
+ *
+ * @instr: instruction byte stream
+ * @instrlen: length of the above
+ * @off: offset within @instr where the first NOP has been detected
+ *
+ * Return: number of NOPs found (and replaced).
+ */
+static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
+{
+ unsigned long flags;
+ int i = off, nnops;
+
+ while (i < instrlen) {
+ if (instr[i] != 0x90)
+ break;
+
+ i++;
+ }
+
+ nnops = i - off;
+
+ if (nnops <= 1)
+ return nnops;
+
+ local_irq_save(flags);
+ add_nops(instr + off, nnops);
+ local_irq_restore(flags);
+
+ DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i);
+
+ return nnops;
+}
+
+/*
* "noinline" to cause control flow change and thus invalidate I$ and
* cause refetch after modification.
*/
static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
{
- unsigned long flags;
struct insn insn;
- int nop, i = 0;
+ int i = 0;
/*
- * Jump over the non-NOP insns, the remaining bytes must be single-byte
- * NOPs, optimize them.
+ * Jump over the non-NOP insns and optimize single-byte NOPs into bigger
+ * ones.
*/
for (;;) {
if (insn_decode_kernel(&insn, &instr[i]))
return;
+ /*
+ * See if this and any potentially following NOPs can be
+ * optimized.
+ */
if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
- break;
-
- if ((i += insn.length) >= a->instrlen)
- return;
- }
+ i += optimize_nops_range(instr, a->instrlen, i);
+ else
+ i += insn.length;
- for (nop = i; i < a->instrlen; i++) {
- if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i]))
+ if (i >= a->instrlen)
return;
}
-
- local_irq_save(flags);
- add_nops(instr + nop, i - nop);
- local_irq_restore(flags);
-
- DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
- instr, nop, a->instrlen);
}
/*
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 4a39fb429f15..d262811ce14b 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2604,6 +2604,7 @@ static void __init apic_bsp_setup(bool upmode)
end_local_APIC_setup();
irq_remap_enable_fault_handling();
setup_IO_APIC();
+ lapic_update_legacy_vectors();
}
#ifdef CONFIG_UP_LATE_INIT
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 6dbdc7c22bb7..fb67ed5e7e6a 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -738,6 +738,26 @@ void lapic_assign_legacy_vector(unsigned int irq, bool replace)
irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace);
}
+void __init lapic_update_legacy_vectors(void)
+{
+ unsigned int i;
+
+ if (IS_ENABLED(CONFIG_X86_IO_APIC) && nr_ioapics > 0)
+ return;
+
+ /*
+ * If the IO/APIC is disabled via config, kernel command line or
+ * lack of enumeration then all legacy interrupts are routed
+ * through the PIC. Make sure that they are marked as legacy
+ * vectors. PIC_CASCADE_IRQ has already been marked in
+ * lapic_assign_system_vectors().
+ */
+ for (i = 0; i < nr_legacy_irqs(); i++) {
+ if (i != PIC_CASCADE_IR)
+ lapic_assign_legacy_vector(i, true);
+ }
+}
+
void __init lapic_assign_system_vectors(void)
{
unsigned int i, vector = 0;
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 3ef5868ac588..7aecb2fc3186 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -63,7 +63,7 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
case 15:
return msr - MSR_P4_BPU_PERFCTR0;
}
- fallthrough;
+ break;
case X86_VENDOR_ZHAOXIN:
case X86_VENDOR_CENTAUR:
return msr - MSR_ARCH_PERFMON_PERFCTR0;
@@ -96,7 +96,7 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
case 15:
return msr - MSR_P4_BSU_ESCR0;
}
- fallthrough;
+ break;
case X86_VENDOR_ZHAOXIN:
case X86_VENDOR_CENTAUR:
return msr - MSR_ARCH_PERFMON_EVENTSEL0;
diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c
index 6ad165a5c0cc..64511c4a5200 100644
--- a/arch/x86/kernel/cpu/sgx/virt.c
+++ b/arch/x86/kernel/cpu/sgx/virt.c
@@ -212,6 +212,7 @@ static int sgx_vepc_release(struct inode *inode, struct file *file)
list_splice_tail(&secs_pages, &zombie_secs_pages);
mutex_unlock(&zombie_secs_pages_lock);
+ xa_destroy(&vepc->page_array);
kfree(vepc);
return 0;
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index a4ec65317a7f..ec3ae3054792 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -307,13 +307,17 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
return 0;
}
- if (!access_ok(buf, size))
- return -EACCES;
+ if (!access_ok(buf, size)) {
+ ret = -EACCES;
+ goto out;
+ }
- if (!static_cpu_has(X86_FEATURE_FPU))
- return fpregs_soft_set(current, NULL,
- 0, sizeof(struct user_i387_ia32_struct),
- NULL, buf) != 0;
+ if (!static_cpu_has(X86_FEATURE_FPU)) {
+ ret = fpregs_soft_set(current, NULL, 0,
+ sizeof(struct user_i387_ia32_struct),
+ NULL, buf);
+ goto out;
+ }
if (use_xsave()) {
struct _fpx_sw_bytes fx_sw_user;
@@ -369,6 +373,25 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
fpregs_unlock();
return 0;
}
+
+ /*
+ * The above did an FPU restore operation, restricted to
+ * the user portion of the registers, and failed, but the
+ * microcode might have modified the FPU registers
+ * nevertheless.
+ *
+ * If the FPU registers do not belong to current, then
+ * invalidate the FPU register state otherwise the task might
+ * preempt current and return to user space with corrupted
+ * FPU registers.
+ *
+ * In case current owns the FPU registers then no further
+ * action is required. The fixup below will handle it
+ * correctly.
+ */
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ __cpu_invalidate_fpregs_state();
+
fpregs_unlock();
} else {
/*
@@ -377,7 +400,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
*/
ret = __copy_from_user(&env, buf, sizeof(env));
if (ret)
- goto err_out;
+ goto out;
envp = &env;
}
@@ -405,16 +428,9 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
if (use_xsave() && !fx_only) {
u64 init_bv = xfeatures_mask_user() & ~user_xfeatures;
- if (using_compacted_format()) {
- ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
- } else {
- ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
-
- if (!ret && state_size > offsetof(struct xregs_state, header))
- ret = validate_user_xstate_header(&fpu->state.xsave.header);
- }
+ ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
if (ret)
- goto err_out;
+ goto out;
sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures,
fx_only);
@@ -434,7 +450,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size);
if (ret) {
ret = -EFAULT;
- goto err_out;
+ goto out;
}
sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures,
@@ -452,7 +468,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
} else {
ret = __copy_from_user(&fpu->state.fsave, buf_fx, state_size);
if (ret)
- goto err_out;
+ goto out;
fpregs_lock();
ret = copy_kernel_to_fregs_err(&fpu->state.fsave);
@@ -463,7 +479,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
fpregs_deactivate(fpu);
fpregs_unlock();
-err_out:
+out:
if (ret)
fpu__clear_user_states(fpu);
return ret;
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index a85c64000218..d0eef963aad1 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1402,60 +1402,3 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
return 0;
}
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
-
-#ifdef CONFIG_IOMMU_SUPPORT
-void update_pasid(void)
-{
- u64 pasid_state;
- u32 pasid;
-
- if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
- return;
-
- if (!current->mm)
- return;
-
- pasid = READ_ONCE(current->mm->pasid);
- /* Set the valid bit in the PASID MSR/state only for valid pasid. */
- pasid_state = pasid == PASID_DISABLED ?
- pasid : pasid | MSR_IA32_PASID_VALID;
-
- /*
- * No need to hold fregs_lock() since the task's fpstate won't
- * be changed by others (e.g. ptrace) while the task is being
- * switched to or is in IPI.
- */
- if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
- /* The MSR is active and can be directly updated. */
- wrmsrl(MSR_IA32_PASID, pasid_state);
- } else {
- struct fpu *fpu = &current->thread.fpu;
- struct ia32_pasid_state *ppasid_state;
- struct xregs_state *xsave;
-
- /*
- * The CPU's xstate registers are not currently active. Just
- * update the PASID state in the memory buffer here. The
- * PASID MSR will be loaded when returning to user mode.
- */
- xsave = &fpu->state.xsave;
- xsave->header.xfeatures |= XFEATURE_MASK_PASID;
- ppasid_state = get_xsave_addr(xsave, XFEATURE_PASID);
- /*
- * Since XFEATURE_MASK_PASID is set in xfeatures, ppasid_state
- * won't be NULL and no need to check its value.
- *
- * Only update the task's PASID state when it's different
- * from the mm's pasid.
- */
- if (ppasid_state->pasid != pasid_state) {
- /*
- * Invalid fpregs so that state restoring will pick up
- * the PASID state.
- */
- __fpu_invalidate_fpregs_state(fpu);
- ppasid_state->pasid = pasid_state;
- }
- }
-}
-#endif /* CONFIG_IOMMU_SUPPORT */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 72920af0b3c0..1e720626069a 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -44,6 +44,7 @@
#include <asm/pci-direct.h>
#include <asm/prom.h>
#include <asm/proto.h>
+#include <asm/thermal.h>
#include <asm/unwind.h>
#include <asm/vsyscall.h>
#include <linux/vmalloc.h>
@@ -637,11 +638,11 @@ static void __init trim_snb_memory(void)
* them from accessing certain memory ranges, namely anything below
* 1M and in the pages listed in bad_pages[] above.
*
- * To avoid these pages being ever accessed by SNB gfx devices
- * reserve all memory below the 1 MB mark and bad_pages that have
- * not already been reserved at boot time.
+ * To avoid these pages being ever accessed by SNB gfx devices reserve
+ * bad_pages that have not already been reserved at boot time.
+ * All memory below the 1 MB mark is anyway reserved later during
+ * setup_arch(), so there is no need to reserve it here.
*/
- memblock_reserve(0, 1<<20);
for (i = 0; i < ARRAY_SIZE(bad_pages); i++) {
if (memblock_reserve(bad_pages[i], PAGE_SIZE))
@@ -733,14 +734,14 @@ static void __init early_reserve_memory(void)
* The first 4Kb of memory is a BIOS owned area, but generally it is
* not listed as such in the E820 table.
*
- * Reserve the first memory page and typically some additional
- * memory (64KiB by default) since some BIOSes are known to corrupt
- * low memory. See the Kconfig help text for X86_RESERVE_LOW.
+ * Reserve the first 64K of memory since some BIOSes are known to
+ * corrupt low memory. After the real mode trampoline is allocated the
+ * rest of the memory below 640k is reserved.
*
* In addition, make sure page 0 is always reserved because on
* systems with L1TF its contents can be leaked to user processes.
*/
- memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE));
+ memblock_reserve(0, SZ_64K);
early_reserve_initrd();
@@ -751,6 +752,7 @@ static void __init early_reserve_memory(void)
reserve_ibft_region();
reserve_bios_regions();
+ trim_snb_memory();
}
/*
@@ -1081,14 +1083,20 @@ void __init setup_arch(char **cmdline_p)
(max_pfn_mapped<<PAGE_SHIFT) - 1);
#endif
- reserve_real_mode();
-
/*
- * Reserving memory causing GPU hangs on Sandy Bridge integrated
- * graphics devices should be done after we allocated memory under
- * 1M for the real mode trampoline.
+ * Find free memory for the real mode trampoline and place it
+ * there.
+ * If there is not enough free memory under 1M, on EFI-enabled
+ * systems there will be additional attempt to reclaim the memory
+ * for the real mode trampoline at efi_free_boot_services().
+ *
+ * Unconditionally reserve the entire first 1M of RAM because
+ * BIOSes are know to corrupt low memory and several
+ * hundred kilobytes are not worth complex detection what memory gets
+ * clobbered. Moreover, on machines with SandyBridge graphics or in
+ * setups that use crashkernel the entire 1M is reserved anyway.
*/
- trim_snb_memory();
+ reserve_real_mode();
init_mem_mapping();
@@ -1226,6 +1234,14 @@ void __init setup_arch(char **cmdline_p)
x86_init.timers.wallclock_init();
+ /*
+ * This needs to run before setup_local_APIC() which soft-disables the
+ * local APIC temporarily and that masks the thermal LVT interrupt,
+ * leading to softlockups on machines which have configured SMI
+ * interrupt delivery.
+ */
+ therm_lvt_init();
+
mcheck_init();
register_refined_jiffies(CLOCK_TICK_RATE);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 1c548ad00752..6bda7f67d737 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -836,8 +836,8 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
if (si_code == SEGV_PKUERR)
force_sig_pkuerr((void __user *)address, pkey);
-
- force_sig_fault(SIGSEGV, si_code, (void __user *)address);
+ else
+ force_sig_fault(SIGSEGV, si_code, (void __user *)address);
local_irq_disable();
}
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 12c686c65ea9..60ade7dd71bd 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -118,7 +118,9 @@ static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *des
if (!IS_ENABLED(CONFIG_EFI))
return;
- if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA)
+ if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA ||
+ (efi_mem_type(addr) == EFI_BOOT_SERVICES_DATA &&
+ efi_mem_attributes(addr) & EFI_MEMORY_RUNTIME))
desc->flags |= IORES_MAP_ENCRYPTED;
}
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index a9639f663d25..470b20208430 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -504,10 +504,6 @@ void __init sme_enable(struct boot_params *bp)
#define AMD_SME_BIT BIT(0)
#define AMD_SEV_BIT BIT(1)
- /* Check the SEV MSR whether SEV or SME is enabled */
- sev_status = __rdmsr(MSR_AMD64_SEV);
- feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
-
/*
* Check for the SME/SEV feature:
* CPUID Fn8000_001F[EAX]
@@ -519,11 +515,16 @@ void __init sme_enable(struct boot_params *bp)
eax = 0x8000001f;
ecx = 0;
native_cpuid(&eax, &ebx, &ecx, &edx);
- if (!(eax & feature_mask))
+ /* Check whether SEV or SME is supported */
+ if (!(eax & (AMD_SEV_BIT | AMD_SME_BIT)))
return;
me_mask = 1UL << (ebx & 0x3f);
+ /* Check the SEV MSR whether SEV or SME is enabled */
+ sev_status = __rdmsr(MSR_AMD64_SEV);
+ feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
+
/* Check if memory encryption is enabled */
if (feature_mask == AMD_SME_BIT) {
/*
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 5eb4dc2b97da..e94da744386f 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -254,7 +254,13 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
/* make sure all non-reserved blocks are inside the limits */
bi->start = max(bi->start, low);
- bi->end = min(bi->end, high);
+
+ /* preserve info for non-RAM areas above 'max_pfn': */
+ if (bi->end > high) {
+ numa_add_memblk_to(bi->nid, high, bi->end,
+ &numa_reserved_meminfo);
+ bi->end = high;
+ }
/* and there's no empty block */
if (bi->start >= bi->end)
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 02dc64625e64..2edd86649468 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -779,4 +779,48 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);
+#define RS690_LOWER_TOP_OF_DRAM2 0x30
+#define RS690_LOWER_TOP_OF_DRAM2_VALID 0x1
+#define RS690_UPPER_TOP_OF_DRAM2 0x31
+#define RS690_HTIU_NB_INDEX 0xA8
+#define RS690_HTIU_NB_INDEX_WR_ENABLE 0x100
+#define RS690_HTIU_NB_DATA 0xAC
+
+/*
+ * Some BIOS implementations support RAM above 4GB, but do not configure the
+ * PCI host to respond to bus master accesses for these addresses. These
+ * implementations set the TOP_OF_DRAM_SLOT1 register correctly, so PCI DMA
+ * works as expected for addresses below 4GB.
+ *
+ * Reference: "AMD RS690 ASIC Family Register Reference Guide" (pg. 2-57)
+ * https://www.amd.com/system/files/TechDocs/43372_rs690_rrg_3.00o.pdf
+ */
+static void rs690_fix_64bit_dma(struct pci_dev *pdev)
+{
+ u32 val = 0;
+ phys_addr_t top_of_dram = __pa(high_memory - 1) + 1;
+
+ if (top_of_dram <= (1ULL << 32))
+ return;
+
+ pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
+ RS690_LOWER_TOP_OF_DRAM2);
+ pci_read_config_dword(pdev, RS690_HTIU_NB_DATA, &val);
+
+ if (val)
+ return;
+
+ pci_info(pdev, "Adjusting top of DRAM to %pa for 64-bit DMA support\n", &top_of_dram);
+
+ pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
+ RS690_UPPER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE);
+ pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, top_of_dram >> 32);
+
+ pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
+ RS690_LOWER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE);
+ pci_write_config_dword(pdev, RS690_HTIU_NB_DATA,
+ top_of_dram | RS690_LOWER_TOP_OF_DRAM2_VALID);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma);
+
#endif
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 7850111008a8..b15ebfe40a73 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -450,6 +450,18 @@ void __init efi_free_boot_services(void)
size -= rm_size;
}
+ /*
+ * Don't free memory under 1M for two reasons:
+ * - BIOS might clobber it
+ * - Crash kernel needs it to be reserved
+ */
+ if (start + size < SZ_1M)
+ continue;
+ if (start < SZ_1M) {
+ size -= (SZ_1M - start);
+ start = SZ_1M;
+ }
+
memblock_free_late(start, size);
}
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 2e1c1bec0f9e..6534c92d0f83 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -29,14 +29,16 @@ void __init reserve_real_mode(void)
/* Has to be under 1M so we can execute real-mode AP code. */
mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
- if (!mem) {
+ if (!mem)
pr_info("No sub-1M memory is available for the trampoline\n");
- return;
- }
+ else
+ set_real_mode_mem(mem);
- memblock_reserve(mem, size);
- set_real_mode_mem(mem);
- crash_reserve_low_1M();
+ /*
+ * Unconditionally reserve the entire fisrt 1M, see comment in
+ * setup_arch().
+ */
+ memblock_reserve(0, SZ_1M);
}
static void sme_sev_setup_real_mode(struct trampoline_header *th)