aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt25
-rw-r--r--Documentation/core-api/kernel-api.rst2
-rw-r--r--Documentation/core-api/symbol-namespaces.rst4
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml8
-rw-r--r--Documentation/filesystems/overlayfs.rst4
-rw-r--r--Documentation/livepatch/module-elf-format.rst10
-rw-r--r--Documentation/sound/soc/dai.rst2
-rw-r--r--Documentation/translations/it_IT/core-api/symbol-namespaces.rst6
-rw-r--r--Documentation/translations/zh_CN/core-api/kernel-api.rst2
-rw-r--r--Documentation/translations/zh_CN/core-api/symbol-namespaces.rst2
-rw-r--r--arch/arm/include/asm/domain.h13
-rw-r--r--arch/arm/include/asm/mach/map.h1
-rw-r--r--arch/arm/include/asm/ptrace.h26
-rw-r--r--arch/arm/kernel/entry-common.S1
-rw-r--r--arch/arm/mm/Kconfig6
-rw-r--r--arch/arm/mm/alignment.c3
-rw-r--r--arch/arm/mm/mmu.c15
-rw-r--r--arch/arm/mm/proc-v7-bugs.c9
-rw-r--r--arch/arm/probes/decode.h26
-rw-r--r--arch/riscv/Kconfig2
-rw-r--r--arch/sh/include/asm/io.h8
-rw-r--r--arch/um/include/asm/page.h4
-rw-r--r--arch/um/include/shared/mem.h4
-rw-r--r--arch/um/os-Linux/skas/process.c6
-rw-r--r--arch/x86/Kconfig108
-rw-r--r--arch/x86/Makefile6
-rw-r--r--arch/x86/entry/Makefile2
-rw-r--r--arch/x86/entry/calling.h62
-rw-r--r--arch/x86/entry/entry.S22
-rw-r--r--arch/x86/entry/entry_32.S2
-rw-r--r--arch/x86/entry/entry_64.S64
-rw-r--r--arch/x86/entry/entry_64_compat.S21
-rw-r--r--arch/x86/entry/vdso/Makefile1
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_emu_64.S9
-rw-r--r--arch/x86/include/asm/alternative.h1
-rw-r--r--arch/x86/include/asm/cpufeatures.h12
-rw-r--r--arch/x86/include/asm/disabled-features.h21
-rw-r--r--arch/x86/include/asm/linkage.h8
-rw-r--r--arch/x86/include/asm/msr-index.h13
-rw-r--r--arch/x86/include/asm/nospec-branch.h68
-rw-r--r--arch/x86/include/asm/static_call.h19
-rw-r--r--arch/x86/include/asm/unwind_hints.h14
-rw-r--r--arch/x86/kernel/alternative.c69
-rw-r--r--arch/x86/kernel/asm-offsets.c6
-rw-r--r--arch/x86/kernel/cpu/amd.c46
-rw-r--r--arch/x86/kernel/cpu/bugs.c475
-rw-r--r--arch/x86/kernel/cpu/common.c61
-rw-r--r--arch/x86/kernel/cpu/cpu.h2
-rw-r--r--arch/x86/kernel/cpu/hygon.c6
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/ftrace.c7
-rw-r--r--arch/x86/kernel/head_64.S5
-rw-r--r--arch/x86/kernel/module.c8
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S25
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S23
-rw-r--r--arch/x86/kernel/static_call.c51
-rw-r--r--arch/x86/kernel/vmlinux.lds.S9
-rw-r--r--arch/x86/kvm/emulate.c28
-rw-r--r--arch/x86/kvm/svm/vmenter.S18
-rw-r--r--arch/x86/kvm/vmx/capabilities.h4
-rw-r--r--arch/x86/kvm/vmx/nested.c2
-rw-r--r--arch/x86/kvm/vmx/run_flags.h8
-rw-r--r--arch/x86/kvm/vmx/vmenter.S194
-rw-r--r--arch/x86/kvm/vmx/vmx.c84
-rw-r--r--arch/x86/kvm/vmx/vmx.h10
-rw-r--r--arch/x86/kvm/vmx/vmx_ops.h2
-rw-r--r--arch/x86/kvm/x86.c4
-rw-r--r--arch/x86/lib/memmove_64.S7
-rw-r--r--arch/x86/lib/retpoline.S79
-rw-r--r--arch/x86/mm/mem_encrypt_boot.S10
-rw-r--r--arch/x86/net/bpf_jit_comp.c26
-rw-r--r--arch/x86/xen/setup.c6
-rw-r--r--arch/x86/xen/xen-asm.S30
-rw-r--r--arch/x86/xen/xen-head.S1
-rw-r--r--arch/x86/xen/xen-ops.h6
-rw-r--r--drivers/amba/bus.c8
-rw-r--r--drivers/base/cpu.c8
-rw-r--r--drivers/dma-buf/dma-resv.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c3
-rw-r--r--drivers/gpu/drm/amd/display/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c3
-rw-r--r--drivers/gpu/drm/bridge/fsl-ldb.c21
-rw-r--r--drivers/gpu/drm/drm_panel_orientation_quirks.c15
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_mst.c1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c9
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c12
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_drv.c4
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_mmu.c2
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_drv.c17
-rw-r--r--drivers/gpu/drm/solomon/ssd130x.c2
-rw-r--r--drivers/idle/intel_idle.c44
-rw-r--r--drivers/vfio/vfio.c17
-rw-r--r--fs/btrfs/inode.c14
-rw-r--r--fs/btrfs/zoned.c34
-rw-r--r--fs/exec.c2
-rw-r--r--fs/lockd/svcsubs.c14
-rw-r--r--fs/nfsd/nfs4xdr.c9
-rw-r--r--fs/nfsd/nfsd.h3
-rw-r--r--fs/nilfs2/nilfs.h3
-rw-r--r--fs/overlayfs/super.c25
-rw-r--r--fs/remap_range.c5
-rw-r--r--include/linux/cgroup-defs.h3
-rw-r--r--include/linux/cpu.h2
-rw-r--r--include/linux/highmem.h18
-rw-r--r--include/linux/kexec.h6
-rw-r--r--include/linux/kvm_host.h2
-rw-r--r--include/linux/objtool.h9
-rw-r--r--include/linux/sched/task.h2
-rw-r--r--include/trace/events/iocost.h2
-rw-r--r--ipc/namespace.c5
-rw-r--r--kernel/cgroup/cgroup.c37
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/kexec_file.c11
-rw-r--r--kernel/module/internal.h13
-rw-r--r--kernel/module/kallsyms.c35
-rw-r--r--kernel/module/main.c9
-rw-r--r--kernel/time/posix-timers.c19
-rw-r--r--kernel/trace/Kconfig3
-rw-r--r--kernel/trace/trace.c11
-rw-r--r--kernel/trace/trace_events_hist.c2
-rw-r--r--mm/damon/vaddr.c3
-rw-r--r--mm/memory.c27
-rw-r--r--mm/rmap.c27
-rw-r--r--mm/sparse-vmemmap.c8
-rw-r--r--mm/userfaultfd.c5
-rw-r--r--samples/fprobe/fprobe_example.c2
-rw-r--r--samples/kprobes/kprobe_example.c5
-rw-r--r--samples/kprobes/kretprobe_example.c5
-rw-r--r--scripts/Makefile.lib1
-rw-r--r--scripts/Makefile.vmlinux_o2
-rw-r--r--security/Kconfig11
-rw-r--r--security/integrity/evm/evm_crypto.c7
-rw-r--r--security/integrity/ima/ima_appraise.c3
-rw-r--r--security/integrity/ima/ima_crypto.c1
-rw-r--r--security/integrity/ima/ima_efi.c2
-rw-r--r--security/integrity/ima/ima_template_lib.c6
-rw-r--r--sound/pci/hda/patch_conexant.c1
-rw-r--r--sound/pci/hda/patch_realtek.c20
-rw-r--r--sound/soc/codecs/arizona.c4
-rw-r--r--sound/soc/codecs/cs47l92.c8
-rw-r--r--sound/soc/codecs/max98396.c10
-rw-r--r--sound/soc/codecs/rt5640.c30
-rw-r--r--sound/soc/codecs/sgtl5000.c9
-rw-r--r--sound/soc/codecs/sgtl5000.h1
-rw-r--r--sound/soc/codecs/tas2764.c46
-rw-r--r--sound/soc/codecs/tas2764.h6
-rw-r--r--sound/soc/codecs/tlv320adcx140.c13
-rw-r--r--sound/soc/codecs/wcd9335.c17
-rw-r--r--sound/soc/codecs/wm5102.c21
-rw-r--r--sound/soc/codecs/wm8998.c21
-rw-r--r--sound/soc/generic/audio-graph-card2.c6
-rw-r--r--sound/soc/intel/boards/sof_rt5682.c10
-rw-r--r--sound/soc/intel/skylake/skl-nhlt.c40
-rw-r--r--sound/soc/qcom/qdsp6/q6apm.c1
-rw-r--r--sound/soc/ti/omap-mcbsp-priv.h2
-rw-r--r--sound/soc/ti/omap-mcbsp-st.c14
-rw-r--r--sound/soc/ti/omap-mcbsp.c19
-rw-r--r--tools/arch/x86/include/asm/msr-index.h9
-rw-r--r--tools/include/linux/objtool.h9
-rw-r--r--tools/objtool/arch/x86/decode.c5
-rw-r--r--tools/objtool/builtin-check.c13
-rw-r--r--tools/objtool/check.c330
-rw-r--r--tools/objtool/include/objtool/arch.h1
-rw-r--r--tools/objtool/include/objtool/builtin.h2
-rw-r--r--tools/objtool/include/objtool/check.h24
-rw-r--r--tools/objtool/include/objtool/elf.h1
-rw-r--r--tools/objtool/include/objtool/objtool.h1
-rw-r--r--tools/objtool/objtool.c1
174 files changed, 2499 insertions, 775 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2522b11e593f..f2d26cb7e853 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5197,6 +5197,30 @@
retain_initrd [RAM] Keep initrd memory after extraction
+ retbleed= [X86] Control mitigation of RETBleed (Arbitrary
+ Speculative Code Execution with Return Instructions)
+ vulnerability.
+
+ off - no mitigation
+ auto - automatically select a migitation
+ auto,nosmt - automatically select a mitigation,
+ disabling SMT if necessary for
+ the full mitigation (only on Zen1
+ and older without STIBP).
+ ibpb - mitigate short speculation windows on
+ basic block boundaries too. Safe, highest
+ perf impact.
+ unret - force enable untrained return thunks,
+ only effective on AMD f15h-f17h
+ based systems.
+ unret,nosmt - like unret, will disable SMT when STIBP
+ is not available.
+
+ Selecting 'auto' will choose a mitigation method at run
+ time according to the CPU.
+
+ Not specifying this option is equivalent to retbleed=auto.
+
rfkill.default_state=
0 "airplane mode". All wifi, bluetooth, wimax, gps, fm,
etc. communication is blocked by default.
@@ -5568,6 +5592,7 @@
eibrs - enhanced IBRS
eibrs,retpoline - enhanced IBRS + Retpolines
eibrs,lfence - enhanced IBRS + LFENCE
+ ibrs - use IBRS to protect kernel
Not specifying this option is equivalent to
spectre_v2=auto.
diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index d6b3f94b9f1f..0793c400d4b0 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -223,7 +223,7 @@ Module Loading
Inter Module support
--------------------
-Refer to the file kernel/module.c for more information.
+Refer to the files in kernel/module/ for more information.
Hardware Interfaces
===================
diff --git a/Documentation/core-api/symbol-namespaces.rst b/Documentation/core-api/symbol-namespaces.rst
index 5ad9e0abe42c..12e4aecdae94 100644
--- a/Documentation/core-api/symbol-namespaces.rst
+++ b/Documentation/core-api/symbol-namespaces.rst
@@ -51,8 +51,8 @@ namespace ``USB_STORAGE``, use::
The corresponding ksymtab entry struct ``kernel_symbol`` will have the member
``namespace`` set accordingly. A symbol that is exported without a namespace will
refer to ``NULL``. There is no default namespace if none is defined. ``modpost``
-and kernel/module.c make use the namespace at build time or module load time,
-respectively.
+and kernel/module/main.c make use the namespace at build time or module load
+time, respectively.
2.2 Using the DEFAULT_SYMBOL_NAMESPACE define
=============================================
diff --git a/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml b/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml
index e9a533080b32..ef18a572a1ff 100644
--- a/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml
+++ b/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml
@@ -25,12 +25,12 @@ properties:
- qcom,sc7280-lpass-cpu
reg:
- minItems: 2
+ minItems: 1
maxItems: 6
description: LPAIF core registers
reg-names:
- minItems: 2
+ minItems: 1
maxItems: 6
clocks:
@@ -42,12 +42,12 @@ properties:
maxItems: 10
interrupts:
- minItems: 2
+ minItems: 1
maxItems: 4
description: LPAIF DMA buffer interrupt
interrupt-names:
- minItems: 2
+ minItems: 1
maxItems: 4
qcom,adsp:
diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst
index 7da6c30ed596..316cfd8b1891 100644
--- a/Documentation/filesystems/overlayfs.rst
+++ b/Documentation/filesystems/overlayfs.rst
@@ -466,6 +466,10 @@ overlay filesystem and the value of st_ino for filesystem objects may not be
persistent and could change even while the overlay filesystem is mounted, as
summarized in the `Inode properties`_ table above.
+4) "idmapped mounts"
+When the upper or lower layers are idmapped mounts overlayfs will be mounted
+without support for POSIX Access Control Lists (ACLs). This limitation will
+eventually be lifted.
Changes to underlying filesystems
---------------------------------
diff --git a/Documentation/livepatch/module-elf-format.rst b/Documentation/livepatch/module-elf-format.rst
index dbe9b400e39f..7347638895a0 100644
--- a/Documentation/livepatch/module-elf-format.rst
+++ b/Documentation/livepatch/module-elf-format.rst
@@ -210,11 +210,11 @@ module->symtab.
=====================================
Normally, a stripped down copy of a module's symbol table (containing only
"core" symbols) is made available through module->symtab (See layout_symtab()
-in kernel/module.c). For livepatch modules, the symbol table copied into memory
-on module load must be exactly the same as the symbol table produced when the
-patch module was compiled. This is because the relocations in each livepatch
-relocation section refer to their respective symbols with their symbol indices,
-and the original symbol indices (and thus the symtab ordering) must be
+in kernel/module/kallsyms.c). For livepatch modules, the symbol table copied
+into memory on module load must be exactly the same as the symbol table produced
+when the patch module was compiled. This is because the relocations in each
+livepatch relocation section refer to their respective symbols with their symbol
+indices, and the original symbol indices (and thus the symtab ordering) must be
preserved in order for apply_relocate_add() to find the right symbol.
For example, take this particular rela from a livepatch module:::
diff --git a/Documentation/sound/soc/dai.rst b/Documentation/sound/soc/dai.rst
index 009b07e5a0f3..bf8431386d26 100644
--- a/Documentation/sound/soc/dai.rst
+++ b/Documentation/sound/soc/dai.rst
@@ -10,7 +10,7 @@ AC97
====
AC97 is a five wire interface commonly found on many PC sound cards. It is
-now also popular in many portable devices. This DAI has a reset line and time
+now also popular in many portable devices. This DAI has a RESET line and time
multiplexes its data on its SDATA_OUT (playback) and SDATA_IN (capture) lines.
The bit clock (BCLK) is always driven by the CODEC (usually 12.288MHz) and the
frame (FRAME) (usually 48kHz) is always driven by the controller. Each AC97
diff --git a/Documentation/translations/it_IT/core-api/symbol-namespaces.rst b/Documentation/translations/it_IT/core-api/symbol-namespaces.rst
index 42f5d04e38ec..0f6898860d6d 100644
--- a/Documentation/translations/it_IT/core-api/symbol-namespaces.rst
+++ b/Documentation/translations/it_IT/core-api/symbol-namespaces.rst
@@ -50,9 +50,9 @@ Di conseguenza, nella tabella dei simboli del kernel ci sarà una voce
rappresentata dalla struttura ``kernel_symbol`` che avrà il campo
``namespace`` (spazio dei nomi) impostato. Un simbolo esportato senza uno spazio
dei nomi avrà questo campo impostato a ``NULL``. Non esiste uno spazio dei nomi
-di base. Il programma ``modpost`` e il codice in kernel/module.c usano lo spazio
-dei nomi, rispettivamente, durante la compilazione e durante il caricamento
-di un modulo.
+di base. Il programma ``modpost`` e il codice in kernel/module/main.c usano lo
+spazio dei nomi, rispettivamente, durante la compilazione e durante il
+caricamento di un modulo.
2.2 Usare il simbolo di preprocessore DEFAULT_SYMBOL_NAMESPACE
==============================================================
diff --git a/Documentation/translations/zh_CN/core-api/kernel-api.rst b/Documentation/translations/zh_CN/core-api/kernel-api.rst
index e45fe80d1cd8..962d31d019d7 100644
--- a/Documentation/translations/zh_CN/core-api/kernel-api.rst
+++ b/Documentation/translations/zh_CN/core-api/kernel-api.rst
@@ -224,7 +224,7 @@ kernel/kmod.c
模块接口支持
------------
-更多信息请参考文件kernel/module.c。
+更多信息请参阅kernel/module/目录下的文件。
硬件接口
========
diff --git a/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst b/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst
index 6abf7ed534ca..bb16f0611046 100644
--- a/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst
+++ b/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst
@@ -52,7 +52,7 @@
相应的 ksymtab 条目结构体 ``kernel_symbol`` 将有相应的成员 ``命名空间`` 集。
导出时未指明命名空间的符号将指向 ``NULL`` 。如果没有定义命名空间,则默认没有。
-``modpost`` 和kernel/module.c分别在构建时或模块加载时使用名称空间。
+``modpost`` 和kernel/module/main.c分别在构建时或模块加载时使用名称空间。
2.2 使用DEFAULT_SYMBOL_NAMESPACE定义
====================================
diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h
index f1d0a7807cd0..41536feb4392 100644
--- a/arch/arm/include/asm/domain.h
+++ b/arch/arm/include/asm/domain.h
@@ -112,19 +112,6 @@ static __always_inline void set_domain(unsigned int val)
}
#endif
-#ifdef CONFIG_CPU_USE_DOMAINS
-#define modify_domain(dom,type) \
- do { \
- unsigned int domain = get_domain(); \
- domain &= ~domain_mask(dom); \
- domain = domain | domain_val(dom, type); \
- set_domain(domain); \
- } while (0)
-
-#else
-static inline void modify_domain(unsigned dom, unsigned type) { }
-#endif
-
/*
* Generate the T (user) versions of the LDR/STR and related
* instructions (inline assembly)
diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h
index 92282558caf7..2b8970d8e5a2 100644
--- a/arch/arm/include/asm/mach/map.h
+++ b/arch/arm/include/asm/mach/map.h
@@ -27,6 +27,7 @@ enum {
MT_HIGH_VECTORS,
MT_MEMORY_RWX,
MT_MEMORY_RW,
+ MT_MEMORY_RO,
MT_ROM,
MT_MEMORY_RWX_NONCACHED,
MT_MEMORY_RW_DTCM,
diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index 93051e2f402c..1408a6a15d0e 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -163,5 +163,31 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs)
((current_stack_pointer | (THREAD_SIZE - 1)) - 7) - 1; \
})
+
+/*
+ * Update ITSTATE after normal execution of an IT block instruction.
+ *
+ * The 8 IT state bits are split into two parts in CPSR:
+ * ITSTATE<1:0> are in CPSR<26:25>
+ * ITSTATE<7:2> are in CPSR<15:10>
+ */
+static inline unsigned long it_advance(unsigned long cpsr)
+{
+ if ((cpsr & 0x06000400) == 0) {
+ /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */
+ cpsr &= ~PSR_IT_MASK;
+ } else {
+ /* We need to shift left ITSTATE<4:0> */
+ const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */
+ unsigned long it = cpsr & mask;
+ it <<= 1;
+ it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */
+ it &= mask;
+ cpsr &= ~mask;
+ cpsr |= it;
+ }
+ return cpsr;
+}
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 7aa3ded4af92..6a447ac67d80 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -302,6 +302,7 @@ local_restart:
b ret_fast_syscall
#endif
ENDPROC(vector_swi)
+ .ltorg
/*
* This is the really slow path. We're going to be doing
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index a3a4589ec73b..fc439c2c16f8 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -631,7 +631,11 @@ config CPU_USE_DOMAINS
bool
help
This option enables or disables the use of domain switching
- via the set_fs() function.
+ using the DACR (domain access control register) to protect memory
+ domains from each other. In Linux we use three domains: kernel, user
+ and IO. The domains are used to protect userspace from kernelspace
+ and to handle IO-space as a special type of memory by assigning
+ manager or client roles to running code (such as a process).
config CPU_V7M_NUM_IRQ
int "Number of external interrupts connected to the NVIC"
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 6f499559d193..f8dd0b3cc8e0 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -935,6 +935,9 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (type == TYPE_LDST)
do_alignment_finish_ldst(addr, instr, regs, offset);
+ if (thumb_mode(regs))
+ regs->ARM_cpsr = it_advance(regs->ARM_cpsr);
+
return 0;
bad_or_fault:
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 5e2be37a198e..cd17e324aa51 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -296,6 +296,13 @@ static struct mem_type mem_types[] __ro_after_init = {
.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
.domain = DOMAIN_KERNEL,
},
+ [MT_MEMORY_RO] = {
+ .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
+ L_PTE_XN | L_PTE_RDONLY,
+ .prot_l1 = PMD_TYPE_TABLE,
+ .prot_sect = PMD_TYPE_SECT,
+ .domain = DOMAIN_KERNEL,
+ },
[MT_ROM] = {
.prot_sect = PMD_TYPE_SECT,
.domain = DOMAIN_KERNEL,
@@ -489,6 +496,7 @@ static void __init build_mem_type_table(void)
/* Also setup NX memory mapping */
mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN;
+ mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_XN;
}
if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
/*
@@ -568,6 +576,7 @@ static void __init build_mem_type_table(void)
mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
+ mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
#endif
/*
@@ -587,6 +596,8 @@ static void __init build_mem_type_table(void)
mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED;
mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S;
mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED;
+ mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_S;
+ mem_types[MT_MEMORY_RO].prot_pte |= L_PTE_SHARED;
mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED;
mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S;
mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED;
@@ -647,6 +658,8 @@ static void __init build_mem_type_table(void)
mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot;
mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd;
mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot;
+ mem_types[MT_MEMORY_RO].prot_sect |= ecc_mask | cp->pmd;
+ mem_types[MT_MEMORY_RO].prot_pte |= kern_pgprot;
mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot;
mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask;
mem_types[MT_ROM].prot_sect |= cp->pmd;
@@ -1360,7 +1373,7 @@ static void __init devicemaps_init(const struct machine_desc *mdesc)
map.pfn = __phys_to_pfn(__atags_pointer & SECTION_MASK);
map.virtual = FDT_FIXED_BASE;
map.length = FDT_FIXED_SIZE;
- map.type = MT_ROM;
+ map.type = MT_MEMORY_RO;
create_mapping(&map);
}
diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
index fb9f3eb6bf48..8bc7a2d6d6c7 100644
--- a/arch/arm/mm/proc-v7-bugs.c
+++ b/arch/arm/mm/proc-v7-bugs.c
@@ -108,8 +108,7 @@ static unsigned int spectre_v2_install_workaround(unsigned int method)
#else
static unsigned int spectre_v2_install_workaround(unsigned int method)
{
- pr_info("CPU%u: Spectre V2: workarounds disabled by configuration\n",
- smp_processor_id());
+ pr_info_once("Spectre V2: workarounds disabled by configuration\n");
return SPECTRE_VULNERABLE;
}
@@ -209,10 +208,10 @@ static int spectre_bhb_install_workaround(int method)
return SPECTRE_VULNERABLE;
spectre_bhb_method = method;
- }
- pr_info("CPU%u: Spectre BHB: using %s workaround\n",
- smp_processor_id(), spectre_bhb_method_name(method));
+ pr_info("CPU%u: Spectre BHB: enabling %s workaround for all CPUs\n",
+ smp_processor_id(), spectre_bhb_method_name(method));
+ }
return SPECTRE_MITIGATED;
}
diff --git a/arch/arm/probes/decode.h b/arch/arm/probes/decode.h
index 973173598992..facc889d05ee 100644
--- a/arch/arm/probes/decode.h
+++ b/arch/arm/probes/decode.h
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/stddef.h>
#include <asm/probes.h>
+#include <asm/ptrace.h>
#include <asm/kprobes.h>
void __init arm_probes_decode_init(void);
@@ -35,31 +36,6 @@ void __init find_str_pc_offset(void);
#endif
-/*
- * Update ITSTATE after normal execution of an IT block instruction.
- *
- * The 8 IT state bits are split into two parts in CPSR:
- * ITSTATE<1:0> are in CPSR<26:25>
- * ITSTATE<7:2> are in CPSR<15:10>
- */
-static inline unsigned long it_advance(unsigned long cpsr)
- {
- if ((cpsr & 0x06000400) == 0) {
- /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */
- cpsr &= ~PSR_IT_MASK;
- } else {
- /* We need to shift left ITSTATE<4:0> */
- const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */
- unsigned long it = cpsr & mask;
- it <<= 1;
- it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */
- it &= mask;
- cpsr &= ~mask;
- cpsr |= it;
- }
- return cpsr;
-}
-
static inline void __kprobes bx_write_pc(long pcv, struct pt_regs *regs)
{
long cpsr = regs->ARM_cpsr;
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 32ffef9f6e5b..fcbb81feb7ad 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -38,7 +38,7 @@ config RISCV
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
select ARCH_SUPPORTS_HUGETLBFS if MMU
- select ARCH_SUPPORTS_PAGE_TABLE_CHECK
+ select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index cf9a3ec32406..fba90e670ed4 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -271,8 +271,12 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size,
#endif /* CONFIG_HAVE_IOREMAP_PROT */
#else /* CONFIG_MMU */
-#define iounmap(addr) do { } while (0)
-#define ioremap(offset, size) ((void __iomem *)(unsigned long)(offset))
+static inline void __iomem *ioremap(phys_addr_t offset, size_t size)
+{
+ return (void __iomem *)(unsigned long)offset;
+}
+
+static inline void iounmap(volatile void __iomem *addr) { }
#endif /* CONFIG_MMU */
#define ioremap_uc ioremap
diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h
index 95af12e82a32..cdbd9653aa14 100644
--- a/arch/um/include/asm/page.h
+++ b/arch/um/include/asm/page.h
@@ -102,8 +102,8 @@ extern unsigned long uml_physmem;
* casting is the right thing, but 32-bit UML can't have 64-bit virtual
* addresses
*/
-#define __pa(virt) to_phys((void *) (unsigned long) (virt))
-#define __va(phys) to_virt((unsigned long) (phys))
+#define __pa(virt) uml_to_phys((void *) (unsigned long) (virt))
+#define __va(phys) uml_to_virt((unsigned long) (phys))
#define phys_to_pfn(p) ((p) >> PAGE_SHIFT)
#define pfn_to_phys(pfn) PFN_PHYS(pfn)
diff --git a/arch/um/include/shared/mem.h b/arch/um/include/shared/mem.h
index 4862c91d4213..98aacd544108 100644
--- a/arch/um/include/shared/mem.h
+++ b/arch/um/include/shared/mem.h
@@ -9,12 +9,12 @@
extern int phys_mapping(unsigned long phys, unsigned long long *offset_out);
extern unsigned long uml_physmem;
-static inline unsigned long to_phys(void *virt)
+static inline unsigned long uml_to_phys(void *virt)
{
return(((unsigned long) virt) - uml_physmem);
}
-static inline void *to_virt(unsigned long phys)
+static inline void *uml_to_virt(unsigned long phys)
{
return((void *) uml_physmem + phys);
}
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 87d3129e7362..c316c993a949 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -251,7 +251,7 @@ static int userspace_tramp(void *stack)
signal(SIGTERM, SIG_DFL);
signal(SIGWINCH, SIG_IGN);
- fd = phys_mapping(to_phys(__syscall_stub_start), &offset);
+ fd = phys_mapping(uml_to_phys(__syscall_stub_start), &offset);
addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE,
PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset);
if (addr == MAP_FAILED) {
@@ -261,7 +261,7 @@ static int userspace_tramp(void *stack)
}
if (stack != NULL) {
- fd = phys_mapping(to_phys(stack), &offset);
+ fd = phys_mapping(uml_to_phys(stack), &offset);
addr = mmap((void *) STUB_DATA,
UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE,
MAP_FIXED | MAP_SHARED, fd, offset);
@@ -534,7 +534,7 @@ int copy_context_skas0(unsigned long new_stack, int pid)
struct stub_data *data = (struct stub_data *) current_stack;
struct stub_data *child_data = (struct stub_data *) new_stack;
unsigned long long new_offset;
- int new_fd = phys_mapping(to_phys((void *)new_stack), &new_offset);
+ int new_fd = phys_mapping(uml_to_phys((void *)new_stack), &new_offset);
/*
* prepare offset and fd of child's stack as argument for parent's
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index be0b95e51df6..e58798f636d4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -462,29 +462,6 @@ config GOLDFISH
def_bool y
depends on X86_GOLDFISH
-config RETPOLINE
- bool "Avoid speculative indirect branches in kernel"
- select OBJTOOL if HAVE_OBJTOOL
- default y
- help
- Compile kernel with the retpoline compiler options to guard against
- kernel-to-user data leaks by avoiding speculative indirect
- branches. Requires a compiler with -mindirect-branch=thunk-extern
- support for full protection. The kernel may run slower.
-
-config CC_HAS_SLS
- def_bool $(cc-option,-mharden-sls=all)
-
-config SLS
- bool "Mitigate Straight-Line-Speculation"
- depends on CC_HAS_SLS && X86_64
- select OBJTOOL if HAVE_OBJTOOL
- default n
- help
- Compile the kernel with straight-line-speculation options to guard
- against straight line speculation. The kernel image might be slightly
- larger.
-
config X86_CPU_RESCTRL
bool "x86 CPU resource control support"
depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
@@ -2453,6 +2430,91 @@ source "kernel/livepatch/Kconfig"
endmenu
+config CC_HAS_SLS
+ def_bool $(cc-option,-mharden-sls=all)
+
+config CC_HAS_RETURN_THUNK
+ def_bool $(cc-option,-mfunction-return=thunk-extern)
+
+menuconfig SPECULATION_MITIGATIONS
+ bool "Mitigations for speculative execution vulnerabilities"
+ default y
+ help
+ Say Y here to enable options which enable mitigations for
+ speculative execution hardware vulnerabilities.
+
+ If you say N, all mitigations will be disabled. You really
+ should know what you are doing to say so.
+
+if SPECULATION_MITIGATIONS
+
+config PAGE_TABLE_ISOLATION
+ bool "Remove the kernel mapping in user mode"
+ default y
+ depends on (X86_64 || X86_PAE)
+ help
+ This feature reduces the number of hardware side channels by
+ ensuring that the majority of kernel addresses are not mapped
+ into userspace.
+
+ See Documentation/x86/pti.rst for more details.
+
+config RETPOLINE
+ bool "Avoid speculative indirect branches in kernel"
+ select OBJTOOL if HAVE_OBJTOOL
+ default y
+ help
+ Compile kernel with the retpoline compiler options to guard against
+ kernel-to-user data leaks by avoiding speculative indirect
+ branches. Requires a compiler with -mindirect-branch=thunk-extern
+ support for full protection. The kernel may run slower.
+
+config RETHUNK
+ bool "Enable return-thunks"
+ depends on RETPOLINE && CC_HAS_RETURN_THUNK
+ select OBJTOOL if HAVE_OBJTOOL
+ default y
+ help
+ Compile the kernel with the return-thunks compiler option to guard
+ against kernel-to-user data leaks by avoiding return speculation.
+ Requires a compiler with -mfunction-return=thunk-extern
+ support for full protection. The kernel may run slower.
+
+config CPU_UNRET_ENTRY
+ bool "Enable UNRET on kernel entry"
+ depends on CPU_SUP_AMD && RETHUNK
+ default y
+ help
+ Compile the kernel with support for the retbleed=unret mitigation.
+
+config CPU_IBPB_ENTRY
+ bool "Enable IBPB on kernel entry"
+ depends on CPU_SUP_AMD
+ default y
+ help
+ Compile the kernel with support for the retbleed=ibpb mitigation.
+
+config CPU_IBRS_ENTRY
+ bool "Enable IBRS on kernel entry"
+ depends on CPU_SUP_INTEL
+ default y
+ help
+ Compile the kernel with support for the spectre_v2=ibrs mitigation.
+ This mitigates both spectre_v2 and retbleed at great cost to
+ performance.
+
+config SLS
+ bool "Mitigate Straight-Line-Speculation"
+ depends on CC_HAS_SLS && X86_64
+ select OBJTOOL if HAVE_OBJTOOL
+ default n
+ help
+ Compile the kernel with straight-line-speculation options to guard
+ against straight line speculation. The kernel image might be slightly
+ larger.
+
+endif
+
config ARCH_HAS_ADD_PAGES
def_bool y
depends on ARCH_ENABLE_MEMORY_HOTPLUG
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index a74886aed349..1f40dad30d50 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -21,6 +21,12 @@ ifdef CONFIG_CC_IS_CLANG
RETPOLINE_CFLAGS := -mretpoline-external-thunk
RETPOLINE_VDSO_CFLAGS := -mretpoline
endif
+
+ifdef CONFIG_RETHUNK
+RETHUNK_CFLAGS := -mfunction-return=thunk-extern
+RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS)
+endif
+
export RETPOLINE_CFLAGS
export RETPOLINE_VDSO_CFLAGS
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index 7fec5dcf6438..eeadbd7d92cc 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -11,7 +11,7 @@ CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE)
CFLAGS_common.o += -fno-stack-protector
-obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
+obj-y := entry.o entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
obj-y += common.o
obj-y += vdso/
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 29b36e9e4e74..f6907627172b 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -7,6 +7,8 @@
#include <asm/asm-offsets.h>
#include <asm/processor-flags.h>
#include <asm/ptrace-abi.h>
+#include <asm/msr.h>
+#include <asm/nospec-branch.h>
/*
@@ -283,6 +285,66 @@ For 32-bit we have the following conventions - kernel is built with
#endif
/*
+ * IBRS kernel mitigation for Spectre_v2.
+ *
+ * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers
+ * the regs it uses (AX, CX, DX). Must be called before the first RET
+ * instruction (NOTE! UNTRAIN_RET includes a RET instruction)
+ *
+ * The optional argument is used to save/restore the current value,
+ * which is used on the paranoid paths.
+ *
+ * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set.
+ */
+.macro IBRS_ENTER save_reg
+#ifdef CONFIG_CPU_IBRS_ENTRY
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
+ movl $MSR_IA32_SPEC_CTRL, %ecx
+
+.ifnb \save_reg
+ rdmsr
+ shl $32, %rdx
+ or %rdx, %rax
+ mov %rax, \save_reg
+ test $SPEC_CTRL_IBRS, %eax
+ jz .Ldo_wrmsr_\@
+ lfence
+ jmp .Lend_\@
+.Ldo_wrmsr_\@:
+.endif
+
+ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
+ movl %edx, %eax
+ shr $32, %rdx
+ wrmsr
+.Lend_\@:
+#endif
+.endm
+
+/*
+ * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX)
+ * regs. Must be called after the last RET.
+ */
+.macro IBRS_EXIT save_reg
+#ifdef CONFIG_CPU_IBRS_ENTRY
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
+ movl $MSR_IA32_SPEC_CTRL, %ecx
+
+.ifnb \save_reg
+ mov \save_reg, %rdx
+.else
+ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
+ andl $(~SPEC_CTRL_IBRS), %edx
+.endif
+
+ movl %edx, %eax
+ shr $32, %rdx
+ wrmsr
+.Lend_\@:
+#endif
+.endm
+
+/*
* Mitigate Spectre v1 for conditional swapgs code paths.
*
* FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
new file mode 100644
index 000000000000..bfb7bcb362bc
--- /dev/null
+++ b/arch/x86/entry/entry.S
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common place for both 32- and 64-bit entry routines.
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+#include <asm/msr-index.h>
+
+.pushsection .noinstr.text, "ax"
+
+SYM_FUNC_START(entry_ibpb)
+ movl $MSR_IA32_PRED_CMD, %ecx
+ movl $PRED_CMD_IBPB, %eax
+ xorl %edx, %edx
+ wrmsr
+ RET
+SYM_FUNC_END(entry_ibpb)
+/* For KVM */
+EXPORT_SYMBOL_GPL(entry_ibpb);
+
+.popsection
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 887420844066..e309e7156038 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -698,7 +698,6 @@ SYM_CODE_START(__switch_to_asm)
movl %ebx, PER_CPU_VAR(__stack_chk_guard)
#endif
-#ifdef CONFIG_RETPOLINE
/*
* When switching from a shallower to a deeper call stack
* the RSB may either underflow or use entries populated
@@ -707,7 +706,6 @@ SYM_CODE_START(__switch_to_asm)
* speculative execution to prevent attack.
*/
FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
-#endif
/* Restore flags or the incoming task to restore AC state. */
popfl
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 4300ba49b5ee..285e043a3e40 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -85,7 +85,7 @@
*/
SYM_CODE_START(entry_SYSCALL_64)
- UNWIND_HINT_EMPTY
+ UNWIND_HINT_ENTRY
ENDBR
swapgs
@@ -112,6 +112,11 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
movq %rsp, %rdi
/* Sign extend the lower 32bit as syscall numbers are treated as int */
movslq %eax, %rsi
+
+ /* clobbers %rax, make sure it is after saving the syscall nr */
+ IBRS_ENTER
+ UNTRAIN_RET
+
call do_syscall_64 /* returns with IRQs disabled */
/*
@@ -191,6 +196,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
* perf profiles. Nothing jumps here.
*/
syscall_return_via_sysret:
+ IBRS_EXIT
POP_REGS pop_rdi=0
/*
@@ -249,7 +255,6 @@ SYM_FUNC_START(__switch_to_asm)
movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
#endif
-#ifdef CONFIG_RETPOLINE
/*
* When switching from a shallower to a deeper call stack
* the RSB may either underflow or use entries populated
@@ -258,7 +263,6 @@ SYM_FUNC_START(__switch_to_asm)
* speculative execution to prevent attack.
*/
FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
-#endif
/* restore callee-saved registers */
popq %r15
@@ -322,13 +326,13 @@ SYM_CODE_END(ret_from_fork)
#endif
.endm
-/* Save all registers in pt_regs */
-SYM_CODE_START_LOCAL(push_and_clear_regs)
+SYM_CODE_START_LOCAL(xen_error_entry)
UNWIND_HINT_FUNC
PUSH_AND_CLEAR_REGS save_ret=1
ENCODE_FRAME_POINTER 8
+ UNTRAIN_RET
RET
-SYM_CODE_END(push_and_clear_regs)
+SYM_CODE_END(xen_error_entry)
/**
* idtentry_body - Macro to emit code calling the C function
@@ -337,9 +341,6 @@ SYM_CODE_END(push_and_clear_regs)
*/
.macro idtentry_body cfunc has_error_code:req
- call push_and_clear_regs
- UNWIND_HINT_REGS
-
/*
* Call error_entry() and switch to the task stack if from userspace.
*
@@ -349,7 +350,7 @@ SYM_CODE_END(push_and_clear_regs)
* switch the CR3. So it can skip invoking error_entry().
*/
ALTERNATIVE "call error_entry; movq %rax, %rsp", \
- "", X86_FEATURE_XENPV
+ "call xen_error_entry", X86_FEATURE_XENPV
ENCODE_FRAME_POINTER
UNWIND_HINT_REGS
@@ -612,6 +613,7 @@ __irqentry_text_end:
SYM_CODE_START_LOCAL(common_interrupt_return)
SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
+ IBRS_EXIT
#ifdef CONFIG_DEBUG_ENTRY
/* Assert that pt_regs indicates user mode. */
testb $3, CS(%rsp)
@@ -725,6 +727,7 @@ native_irq_return_ldt:
pushq %rdi /* Stash user RDI */
swapgs /* to kernel GS */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */
+ UNTRAIN_RET
movq PER_CPU_VAR(espfix_waddr), %rdi
movq %rax, (0*8)(%rdi) /* user RAX */
@@ -897,6 +900,9 @@ SYM_CODE_END(xen_failsafe_callback)
* 1 -> no SWAPGS on exit
*
* Y GSBASE value at entry, must be restored in paranoid_exit
+ *
+ * R14 - old CR3
+ * R15 - old SPEC_CTRL
*/
SYM_CODE_START_LOCAL(paranoid_entry)
UNWIND_HINT_FUNC
@@ -940,7 +946,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
* is needed here.
*/
SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
- RET
+ jmp .Lparanoid_gsbase_done
.Lparanoid_entry_checkgs:
/* EBX = 1 -> kernel GSBASE active, no restore required */
@@ -959,8 +965,16 @@ SYM_CODE_START_LOCAL(paranoid_entry)
xorl %ebx, %ebx
swapgs
.Lparanoid_kernel_gsbase:
-
FENCE_SWAPGS_KERNEL_ENTRY
+.Lparanoid_gsbase_done:
+
+ /*
+ * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like
+ * CR3 above, keep the old value in a callee saved register.
+ */
+ IBRS_ENTER save_reg=%r15
+ UNTRAIN_RET
+
RET
SYM_CODE_END(paranoid_entry)
@@ -982,9 +996,19 @@ SYM_CODE_END(paranoid_entry)
* 1 -> no SWAPGS on exit
*
* Y User space GSBASE, must be restored unconditionally
+ *
+ * R14 - old CR3
+ * R15 - old SPEC_CTRL
*/
SYM_CODE_START_LOCAL(paranoid_exit)
UNWIND_HINT_REGS
+
+ /*
+ * Must restore IBRS state before both CR3 and %GS since we need access
+ * to the per-CPU x86_spec_ctrl_shadow variable.
+ */
+ IBRS_EXIT save_reg=%r15
+
/*
* The order of operations is important. RESTORE_CR3 requires
* kernel GSBASE.
@@ -1017,6 +1041,10 @@ SYM_CODE_END(paranoid_exit)
*/
SYM_CODE_START_LOCAL(error_entry)
UNWIND_HINT_FUNC
+
+ PUSH_AND_CLEAR_REGS save_ret=1
+ ENCODE_FRAME_POINTER 8
+
testb $3, CS+8(%rsp)
jz .Lerror_kernelspace
@@ -1028,9 +1056,12 @@ SYM_CODE_START_LOCAL(error_entry)
FENCE_SWAPGS_USER_ENTRY
/* We have user CR3. Change to kernel CR3. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ IBRS_ENTER
+ UNTRAIN_RET
leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */
.Lerror_entry_from_usermode_after_swapgs:
+
/* Put us onto the real thread stack. */
call sync_regs
RET
@@ -1065,6 +1096,7 @@ SYM_CODE_START_LOCAL(error_entry)
.Lerror_entry_done_lfence:
FENCE_SWAPGS_KERNEL_ENTRY
leaq 8(%rsp), %rax /* return pt_regs pointer */
+ ANNOTATE_UNRET_END
RET
.Lbstep_iret:
@@ -1080,6 +1112,8 @@ SYM_CODE_START_LOCAL(error_entry)
swapgs
FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ IBRS_ENTER
+ UNTRAIN_RET
/*
* Pretend that the exception came from user mode: set up pt_regs
@@ -1185,6 +1219,9 @@ SYM_CODE_START(asm_exc_nmi)
PUSH_AND_CLEAR_REGS rdx=(%rdx)
ENCODE_FRAME_POINTER
+ IBRS_ENTER
+ UNTRAIN_RET
+
/*
* At this point we no longer need to worry about stack damage
* due to nesting -- we're on the normal thread stack and we're
@@ -1409,6 +1446,9 @@ end_repeat_nmi:
movq $-1, %rsi
call exc_nmi
+ /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */
+ IBRS_EXIT save_reg=%r15
+
/* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index d1052742ad0c..682338e7e2a3 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -4,7 +4,6 @@
*
* Copyright 2000-2002 Andi Kleen, SuSE Labs.
*/
-#include "calling.h"
#include <asm/asm-offsets.h>
#include <asm/current.h>
#include <asm/errno.h>
@@ -14,9 +13,12 @@
#include <asm/irqflags.h>
#include <asm/asm.h>
#include <asm/smap.h>
+#include <asm/nospec-branch.h>
#include <linux/linkage.h>
#include <linux/err.h>
+#include "calling.h"
+
.section .entry.text, "ax"
/*
@@ -47,7 +49,7 @@
* 0(%ebp) arg6
*/
SYM_CODE_START(entry_SYSENTER_compat)
- UNWIND_HINT_EMPTY
+ UNWIND_HINT_ENTRY
ENDBR
/* Interrupts are off on entry. */
swapgs
@@ -88,6 +90,9 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
cld
+ IBRS_ENTER
+ UNTRAIN_RET
+
/*
* SYSENTER doesn't filter flags, so we need to clear NT and AC
* ourselves. To save a few cycles, we can check whether
@@ -174,7 +179,7 @@ SYM_CODE_END(entry_SYSENTER_compat)
* 0(%esp) arg6
*/
SYM_CODE_START(entry_SYSCALL_compat)
- UNWIND_HINT_EMPTY
+ UNWIND_HINT_ENTRY
ENDBR
/* Interrupts are off on entry. */
swapgs
@@ -203,6 +208,9 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
PUSH_AND_CLEAR_REGS rcx=%rbp rax=$-ENOSYS
UNWIND_HINT_REGS
+ IBRS_ENTER
+ UNTRAIN_RET
+
movq %rsp, %rdi
call do_fast_syscall_32
/* XEN PV guests always use IRET path */
@@ -217,6 +225,8 @@ sysret32_from_system_call:
*/
STACKLEAK_ERASE
+ IBRS_EXIT
+
movq RBX(%rsp), %rbx /* pt_regs->rbx */
movq RBP(%rsp), %rbp /* pt_regs->rbp */
movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */
@@ -295,7 +305,7 @@ SYM_CODE_END(entry_SYSCALL_compat)
* ebp arg6
*/
SYM_CODE_START(entry_INT80_compat)
- UNWIND_HINT_EMPTY
+ UNWIND_HINT_ENTRY
ENDBR
/*
* Interrupts are off on entry.
@@ -337,6 +347,9 @@ SYM_CODE_START(entry_INT80_compat)
cld
+ IBRS_ENTER
+ UNTRAIN_RET
+
movq %rsp, %rdi
call do_int80_syscall_32
jmp swapgs_restore_regs_and_return_to_usermode
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index c2a8b76ae0bc..76cd790ed0bd 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -92,6 +92,7 @@ endif
endif
$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
+$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO
#
# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
diff --git a/arch/x86/entry/vsyscall/vsyscall_emu_64.S b/arch/x86/entry/vsyscall/vsyscall_emu_64.S
index 15e35159ebb6..ef2dd1827243 100644
--- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S
+++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S
@@ -19,17 +19,20 @@ __vsyscall_page:
mov $__NR_gettimeofday, %rax
syscall
- RET
+ ret
+ int3
.balign 1024, 0xcc
mov $__NR_time, %rax
syscall
- RET
+ ret
+ int3
.balign 1024, 0xcc
mov $__NR_getcpu, %rax
syscall
- RET
+ ret
+ int3
.balign 4096, 0xcc
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 9b10c8c76087..9542c582d546 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -76,6 +76,7 @@ extern int alternatives_patched;
extern void alternative_instructions(void);
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
extern void apply_retpolines(s32 *start, s32 *end);
+extern void apply_returns(s32 *start, s32 *end);
extern void apply_ibt_endbr(s32 *start, s32 *end);
struct module;
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 03acc823838a..00f5227c8459 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -203,8 +203,8 @@
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
+#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
@@ -296,6 +296,12 @@
#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */
#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
+#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */
+#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */
+#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
+#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
@@ -316,6 +322,7 @@
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
+#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
@@ -447,5 +454,6 @@
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
+#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 36369e76cc63..33d2cd04d254 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -50,6 +50,25 @@
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif
+#ifdef CONFIG_RETPOLINE
+# define DISABLE_RETPOLINE 0
+#else
+# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
+ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
+#endif
+
+#ifdef CONFIG_RETHUNK
+# define DISABLE_RETHUNK 0
+#else
+# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31))
+#endif
+
+#ifdef CONFIG_CPU_UNRET_ENTRY
+# define DISABLE_UNRET 0
+#else
+# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
+#endif
+
#ifdef CONFIG_INTEL_IOMMU_SVM
# define DISABLE_ENQCMD 0
#else
@@ -82,7 +101,7 @@
#define DISABLED_MASK8 (DISABLE_TDX_GUEST)
#define DISABLED_MASK9 (DISABLE_SGX)
#define DISABLED_MASK10 0
-#define DISABLED_MASK11 0
+#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET)
#define DISABLED_MASK12 0
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 85865f1645bd..73ca20049835 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -19,19 +19,27 @@
#define __ALIGN_STR __stringify(__ALIGN)
#endif
+#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#define RET jmp __x86_return_thunk
+#else /* CONFIG_RETPOLINE */
#ifdef CONFIG_SLS
#define RET ret; int3
#else
#define RET ret
#endif
+#endif /* CONFIG_RETPOLINE */
#else /* __ASSEMBLY__ */
+#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#define ASM_RET "jmp __x86_return_thunk\n\t"
+#else /* CONFIG_RETPOLINE */
#ifdef CONFIG_SLS
#define ASM_RET "ret; int3\n\t"
#else
#define ASM_RET "ret\n\t"
#endif
+#endif /* CONFIG_RETPOLINE */
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index d27e0581b777..cc615be27a54 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -51,6 +51,8 @@
#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
+#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
+#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
@@ -93,6 +95,7 @@
#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */
#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */
+#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */
#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */
#define ARCH_CAP_SSB_NO BIT(4) /*
* Not susceptible to Speculative Store Bypass
@@ -140,6 +143,13 @@
* bit available to control VERW
* behavior.
*/
+#define ARCH_CAP_RRSBA BIT(19) /*
+ * Indicates RET may use predictors
+ * other than the RSB. With eIBRS
+ * enabled predictions in kernel mode
+ * are restricted to targets in
+ * kernel.
+ */
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
@@ -567,6 +577,9 @@
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
+#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3
+#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
+
/* Fam 16h MSRs */
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
#define MSR_F16H_L2I_PERF_CTR 0xc0010231
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index da251a5645b0..bb05ed4f46bd 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -76,6 +76,23 @@
.endm
/*
+ * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions
+ * vs RETBleed validation.
+ */
+#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE
+
+/*
+ * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should
+ * eventually turn into it's own annotation.
+ */
+.macro ANNOTATE_UNRET_END
+#ifdef CONFIG_DEBUG_ENTRY
+ ANNOTATE_RETPOLINE_SAFE
+ nop
+#endif
+.endm
+
+/*
* JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
* indirect jmp/call which may be susceptible to the Spectre variant 2
* attack.
@@ -105,10 +122,34 @@
* monstrosity above, manually.
*/
.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
-#ifdef CONFIG_RETPOLINE
ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
.Lskip_rsb_\@:
+.endm
+
+#ifdef CONFIG_CPU_UNRET_ENTRY
+#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret"
+#else
+#define CALL_ZEN_UNTRAIN_RET ""
+#endif
+
+/*
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
+ * return thunk isn't mapped into the userspace tables (then again, AMD
+ * typically has NO_MELTDOWN).
+ *
+ * While zen_untrain_ret() doesn't clobber anything but requires stack,
+ * entry_ibpb() will clobber AX, CX, DX.
+ *
+ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
+ * where we have a stack but before any RET instruction.
+ */
+.macro UNTRAIN_RET
+#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY)
+ ANNOTATE_UNRET_END
+ ALTERNATIVE_2 "", \
+ CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \
+ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB
#endif
.endm
@@ -120,17 +161,20 @@
_ASM_PTR " 999b\n\t" \
".popsection\n\t"
-#ifdef CONFIG_RETPOLINE
-
typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
+extern retpoline_thunk_t __x86_indirect_thunk_array[];
+
+extern void __x86_return_thunk(void);
+extern void zen_untrain_ret(void);
+extern void entry_ibpb(void);
+
+#ifdef CONFIG_RETPOLINE
#define GEN(reg) \
extern retpoline_thunk_t __x86_indirect_thunk_ ## reg;
#include <asm/GEN-for-each-reg.h>
#undef GEN
-extern retpoline_thunk_t __x86_indirect_thunk_array[];
-
#ifdef CONFIG_X86_64
/*
@@ -193,6 +237,7 @@ enum spectre_v2_mitigation {
SPECTRE_V2_EIBRS,
SPECTRE_V2_EIBRS_RETPOLINE,
SPECTRE_V2_EIBRS_LFENCE,
+ SPECTRE_V2_IBRS,
};
/* The indirect branch speculation control variants */
@@ -235,6 +280,9 @@ static inline void indirect_branch_prediction_barrier(void)
/* The Intel SPEC CTRL MSR base value cache */
extern u64 x86_spec_ctrl_base;
+extern u64 x86_spec_ctrl_current;
+extern void write_spec_ctrl_current(u64 val, bool force);
+extern u64 spec_ctrl_current(void);
/*
* With retpoline, we must use IBRS to restrict branch prediction
@@ -244,18 +292,16 @@ extern u64 x86_spec_ctrl_base;
*/
#define firmware_restrict_branch_speculation_start() \
do { \
- u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \
- \
preempt_disable(); \
- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
+ spec_ctrl_current() | SPEC_CTRL_IBRS, \
X86_FEATURE_USE_IBRS_FW); \
} while (0)
#define firmware_restrict_branch_speculation_end() \
do { \
- u64 val = x86_spec_ctrl_base; \
- \
- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
+ spec_ctrl_current(), \
X86_FEATURE_USE_IBRS_FW); \
preempt_enable(); \
} while (0)
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
index 2d8dacd02643..343b722ccaf2 100644
--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -21,6 +21,16 @@
* relative displacement across sections.
*/
+/*
+ * The trampoline is 8 bytes and of the general form:
+ *
+ * jmp.d32 \func
+ * ud1 %esp, %ecx
+ *
+ * That trailing #UD provides both a speculation stop and serves as a unique
+ * 3 byte signature identifying static call trampolines. Also see tramp_ud[]
+ * and __static_call_fixup().
+ */
#define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \
asm(".pushsection .static_call.text, \"ax\" \n" \
".align 4 \n" \
@@ -28,7 +38,7 @@
STATIC_CALL_TRAMP_STR(name) ": \n" \
ANNOTATE_NOENDBR \
insns " \n" \
- ".byte 0x53, 0x43, 0x54 \n" \
+ ".byte 0x0f, 0xb9, 0xcc \n" \
".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
".popsection \n")
@@ -36,8 +46,13 @@
#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
+#ifdef CONFIG_RETHUNK
+#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
+ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk")
+#else
#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop")
+#endif
#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \
ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0)
@@ -48,4 +63,6 @@
".long " STATIC_CALL_KEY_STR(name) " - . \n" \
".popsection \n")
+extern bool __static_call_fixup(void *tramp, u8 op, void *dest);
+
#endif /* _ASM_STATIC_CALL_H */
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
index 8b33674288ea..f66fbe6537dd 100644
--- a/arch/x86/include/asm/unwind_hints.h
+++ b/arch/x86/include/asm/unwind_hints.h
@@ -8,7 +8,11 @@
#ifdef __ASSEMBLY__
.macro UNWIND_HINT_EMPTY
- UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1
+ UNWIND_HINT type=UNWIND_HINT_TYPE_CALL end=1
+.endm
+
+.macro UNWIND_HINT_ENTRY
+ UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1
.endm
.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0
@@ -52,6 +56,14 @@
UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC
.endm
+.macro UNWIND_HINT_SAVE
+ UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE
+.endm
+
+.macro UNWIND_HINT_RESTORE
+ UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE
+.endm
+
#else
#define UNWIND_HINT_FUNC \
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index e257f6c80372..d6858533e6e5 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -115,6 +115,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
}
extern s32 __retpoline_sites[], __retpoline_sites_end[];
+extern s32 __return_sites[], __return_sites_end[];
extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[];
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern s32 __smp_locks[], __smp_locks_end[];
@@ -507,9 +508,76 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
}
}
+#ifdef CONFIG_RETHUNK
+/*
+ * Rewrite the compiler generated return thunk tail-calls.
+ *
+ * For example, convert:
+ *
+ * JMP __x86_return_thunk
+ *
+ * into:
+ *
+ * RET
+ */
+static int patch_return(void *addr, struct insn *insn, u8 *bytes)
+{
+ int i = 0;
+
+ if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+ return -1;
+
+ bytes[i++] = RET_INSN_OPCODE;
+
+ for (; i < insn->length;)
+ bytes[i++] = INT3_INSN_OPCODE;
+
+ return i;
+}
+
+void __init_or_module noinline apply_returns(s32 *start, s32 *end)
+{
+ s32 *s;
+
+ for (s = start; s < end; s++) {
+ void *dest = NULL, *addr = (void *)s + *s;
+ struct insn insn;
+ int len, ret;
+ u8 bytes[16];
+ u8 op;
+
+ ret = insn_decode_kernel(&insn, addr);
+ if (WARN_ON_ONCE(ret < 0))
+ continue;
+
+ op = insn.opcode.bytes[0];
+ if (op == JMP32_INSN_OPCODE)
+ dest = addr + insn.length + insn.immediate.value;
+
+ if (__static_call_fixup(addr, op, dest) ||
+ WARN_ON_ONCE(dest != &__x86_return_thunk))
+ continue;
+
+ DPRINTK("return thunk at: %pS (%px) len: %d to: %pS",
+ addr, addr, insn.length,
+ addr + insn.length + insn.immediate.value);
+
+ len = patch_return(addr, &insn, bytes);
+ if (len == insn.length) {
+ DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr);
+ DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
+ text_poke_early(addr, bytes, len);
+ }
+ }
+}
+#else
+void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
+#endif /* CONFIG_RETHUNK */
+
#else /* !CONFIG_RETPOLINE || !CONFIG_OBJTOOL */
void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
+void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
#endif /* CONFIG_RETPOLINE && CONFIG_OBJTOOL */
@@ -860,6 +928,7 @@ void __init alternative_instructions(void)
* those can rewrite the retpoline thunks.
*/
apply_retpolines(__retpoline_sites, __retpoline_sites_end);
+ apply_returns(__return_sites, __return_sites_end);
/*
* Then patch alternatives, such that those paravirt calls that are in
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 437308004ef2..cb50589a7102 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -19,6 +19,7 @@
#include <asm/suspend.h>
#include <asm/tlbflush.h>
#include <asm/tdx.h>
+#include "../kvm/vmx/vmx.h"
#ifdef CONFIG_XEN
#include <xen/interface/xen.h>
@@ -107,4 +108,9 @@ static void __used common(void)
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
+
+ if (IS_ENABLED(CONFIG_KVM_INTEL)) {
+ BLANK();
+ OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl);
+ }
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 0c0b09796ced..35d5288394cb 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -862,6 +862,28 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
clear_rdrand_cpuid_bit(c);
}
+void init_spectral_chicken(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_CPU_UNRET_ENTRY
+ u64 value;
+
+ /*
+ * On Zen2 we offer this chicken (bit) on the altar of Speculation.
+ *
+ * This suppresses speculation from the middle of a basic block, i.e. it
+ * suppresses non-branch predictions.
+ *
+ * We use STIBP as a heuristic to filter out Zen2 from the rest of F17H
+ */
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_AMD_STIBP)) {
+ if (!rdmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) {
+ value |= MSR_ZEN2_SPECTRAL_CHICKEN_BIT;
+ wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value);
+ }
+ }
+#endif
+}
+
static void init_amd_zn(struct cpuinfo_x86 *c)
{
set_cpu_cap(c, X86_FEATURE_ZEN);
@@ -870,12 +892,21 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
node_reclaim_distance = 32;
#endif
- /*
- * Fix erratum 1076: CPB feature bit not being set in CPUID.
- * Always set it, except when running under a hypervisor.
- */
- if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB))
- set_cpu_cap(c, X86_FEATURE_CPB);
+ /* Fix up CPUID bits, but only if not virtualised. */
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) {
+
+ /* Erratum 1076: CPB feature bit not being set in CPUID. */
+ if (!cpu_has(c, X86_FEATURE_CPB))
+ set_cpu_cap(c, X86_FEATURE_CPB);
+
+ /*
+ * Zen3 (Fam19 model < 0x10) parts are not susceptible to
+ * Branch Type Confusion, but predate the allocation of the
+ * BTC_NO bit.
+ */
+ if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO))
+ set_cpu_cap(c, X86_FEATURE_BTC_NO);
+ }
}
static void init_amd(struct cpuinfo_x86 *c)
@@ -907,7 +938,8 @@ static void init_amd(struct cpuinfo_x86 *c)
case 0x12: init_amd_ln(c); break;
case 0x15: init_amd_bd(c); break;
case 0x16: init_amd_jg(c); break;
- case 0x17: fallthrough;
+ case 0x17: init_spectral_chicken(c);
+ fallthrough;
case 0x19: init_amd_zn(c); break;
}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 74c62cc47a5f..0dd04713434b 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -38,6 +38,8 @@
static void __init spectre_v1_select_mitigation(void);
static void __init spectre_v2_select_mitigation(void);
+static void __init retbleed_select_mitigation(void);
+static void __init spectre_v2_user_select_mitigation(void);
static void __init ssb_select_mitigation(void);
static void __init l1tf_select_mitigation(void);
static void __init mds_select_mitigation(void);
@@ -48,16 +50,40 @@ static void __init mmio_select_mitigation(void);
static void __init srbds_select_mitigation(void);
static void __init l1d_flush_select_mitigation(void);
-/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
+/* The base value of the SPEC_CTRL MSR without task-specific bits set */
u64 x86_spec_ctrl_base;
EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
+
+/* The current value of the SPEC_CTRL MSR with task-specific bits set */
+DEFINE_PER_CPU(u64, x86_spec_ctrl_current);
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_current);
+
static DEFINE_MUTEX(spec_ctrl_mutex);
/*
- * The vendor and possibly platform specific bits which can be modified in
- * x86_spec_ctrl_base.
+ * Keep track of the SPEC_CTRL MSR value for the current task, which may differ
+ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update().
*/
-static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
+void write_spec_ctrl_current(u64 val, bool force)
+{
+ if (this_cpu_read(x86_spec_ctrl_current) == val)
+ return;
+
+ this_cpu_write(x86_spec_ctrl_current, val);
+
+ /*
+ * When KERNEL_IBRS this MSR is written on return-to-user, unless
+ * forced the update can be delayed until that time.
+ */
+ if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS))
+ wrmsrl(MSR_IA32_SPEC_CTRL, val);
+}
+
+u64 spec_ctrl_current(void)
+{
+ return this_cpu_read(x86_spec_ctrl_current);
+}
+EXPORT_SYMBOL_GPL(spec_ctrl_current);
/*
* AMD specific MSR info for Speculative Store Bypass control.
@@ -114,13 +140,21 @@ void __init check_bugs(void)
if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
- /* Allow STIBP in MSR_SPEC_CTRL if supported */
- if (boot_cpu_has(X86_FEATURE_STIBP))
- x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
-
/* Select the proper CPU mitigations before patching alternatives: */
spectre_v1_select_mitigation();
spectre_v2_select_mitigation();
+ /*
+ * retbleed_select_mitigation() relies on the state set by
+ * spectre_v2_select_mitigation(); specifically it wants to know about
+ * spectre_v2=ibrs.
+ */
+ retbleed_select_mitigation();
+ /*
+ * spectre_v2_user_select_mitigation() relies on the state set by
+ * retbleed_select_mitigation(); specifically the STIBP selection is
+ * forced for UNRET.
+ */
+ spectre_v2_user_select_mitigation();
ssb_select_mitigation();
l1tf_select_mitigation();
md_clear_select_mitigation();
@@ -161,31 +195,17 @@ void __init check_bugs(void)
#endif
}
+/*
+ * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is
+ * done in vmenter.S.
+ */
void
x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
{
- u64 msrval, guestval, hostval = x86_spec_ctrl_base;
+ u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current();
struct thread_info *ti = current_thread_info();
- /* Is MSR_SPEC_CTRL implemented ? */
if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
- /*
- * Restrict guest_spec_ctrl to supported values. Clear the
- * modifiable bits in the host base value and or the
- * modifiable bits from the guest value.
- */
- guestval = hostval & ~x86_spec_ctrl_mask;
- guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
-
- /* SSBD controlled in MSR_SPEC_CTRL */
- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
- static_cpu_has(X86_FEATURE_AMD_SSBD))
- hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
-
- /* Conditional STIBP enabled? */
- if (static_branch_unlikely(&switch_to_cond_stibp))
- hostval |= stibp_tif_to_spec_ctrl(ti->flags);
-
if (hostval != guestval) {
msrval = setguest ? guestval : hostval;
wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
@@ -752,12 +772,180 @@ static int __init nospectre_v1_cmdline(char *str)
}
early_param("nospectre_v1", nospectre_v1_cmdline);
-#undef pr_fmt
-#define pr_fmt(fmt) "Spectre V2 : " fmt
-
static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
SPECTRE_V2_NONE;
+#undef pr_fmt
+#define pr_fmt(fmt) "RETBleed: " fmt
+
+enum retbleed_mitigation {
+ RETBLEED_MITIGATION_NONE,
+ RETBLEED_MITIGATION_UNRET,
+ RETBLEED_MITIGATION_IBPB,
+ RETBLEED_MITIGATION_IBRS,
+ RETBLEED_MITIGATION_EIBRS,
+};
+
+enum retbleed_mitigation_cmd {
+ RETBLEED_CMD_OFF,
+ RETBLEED_CMD_AUTO,
+ RETBLEED_CMD_UNRET,
+ RETBLEED_CMD_IBPB,
+};
+
+const char * const retbleed_strings[] = {
+ [RETBLEED_MITIGATION_NONE] = "Vulnerable",
+ [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk",
+ [RETBLEED_MITIGATION_IBPB] = "Mitigation: IBPB",
+ [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS",
+ [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS",
+};
+
+static enum retbleed_mitigation retbleed_mitigation __ro_after_init =
+ RETBLEED_MITIGATION_NONE;
+static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init =
+ RETBLEED_CMD_AUTO;
+
+static int __ro_after_init retbleed_nosmt = false;
+
+static int __init retbleed_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ while (str) {
+ char *next = strchr(str, ',');
+ if (next) {
+ *next = 0;
+ next++;
+ }
+
+ if (!strcmp(str, "off")) {
+ retbleed_cmd = RETBLEED_CMD_OFF;
+ } else if (!strcmp(str, "auto")) {
+ retbleed_cmd = RETBLEED_CMD_AUTO;
+ } else if (!strcmp(str, "unret")) {
+ retbleed_cmd = RETBLEED_CMD_UNRET;
+ } else if (!strcmp(str, "ibpb")) {
+ retbleed_cmd = RETBLEED_CMD_IBPB;
+ } else if (!strcmp(str, "nosmt")) {
+ retbleed_nosmt = true;
+ } else {
+ pr_err("Ignoring unknown retbleed option (%s).", str);
+ }
+
+ str = next;
+ }
+
+ return 0;
+}
+early_param("retbleed", retbleed_parse_cmdline);
+
+#define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n"
+#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n"
+
+static void __init retbleed_select_mitigation(void)
+{
+ bool mitigate_smt = false;
+
+ if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off())
+ return;
+
+ switch (retbleed_cmd) {
+ case RETBLEED_CMD_OFF:
+ return;
+
+ case RETBLEED_CMD_UNRET:
+ if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) {
+ retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
+ } else {
+ pr_err("WARNING: kernel not compiled with CPU_UNRET_ENTRY.\n");
+ goto do_cmd_auto;
+ }
+ break;
+
+ case RETBLEED_CMD_IBPB:
+ if (!boot_cpu_has(X86_FEATURE_IBPB)) {
+ pr_err("WARNING: CPU does not support IBPB.\n");
+ goto do_cmd_auto;
+ } else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
+ retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
+ } else {
+ pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
+ goto do_cmd_auto;
+ }
+ break;
+
+do_cmd_auto:
+ case RETBLEED_CMD_AUTO:
+ default:
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
+ if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY))
+ retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
+ else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY) && boot_cpu_has(X86_FEATURE_IBPB))
+ retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
+ }
+
+ /*
+ * The Intel mitigation (IBRS or eIBRS) was already selected in
+ * spectre_v2_select_mitigation(). 'retbleed_mitigation' will
+ * be set accordingly below.
+ */
+
+ break;
+ }
+
+ switch (retbleed_mitigation) {
+ case RETBLEED_MITIGATION_UNRET:
+ setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+ setup_force_cpu_cap(X86_FEATURE_UNRET);
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+ pr_err(RETBLEED_UNTRAIN_MSG);
+
+ mitigate_smt = true;
+ break;
+
+ case RETBLEED_MITIGATION_IBPB:
+ setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ mitigate_smt = true;
+ break;
+
+ default:
+ break;
+ }
+
+ if (mitigate_smt && !boot_cpu_has(X86_FEATURE_STIBP) &&
+ (retbleed_nosmt || cpu_mitigations_auto_nosmt()))
+ cpu_smt_disable(false);
+
+ /*
+ * Let IBRS trump all on Intel without affecting the effects of the
+ * retbleed= cmdline option.
+ */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+ switch (spectre_v2_enabled) {
+ case SPECTRE_V2_IBRS:
+ retbleed_mitigation = RETBLEED_MITIGATION_IBRS;
+ break;
+ case SPECTRE_V2_EIBRS:
+ case SPECTRE_V2_EIBRS_RETPOLINE:
+ case SPECTRE_V2_EIBRS_LFENCE:
+ retbleed_mitigation = RETBLEED_MITIGATION_EIBRS;
+ break;
+ default:
+ pr_err(RETBLEED_INTEL_MSG);
+ }
+ }
+
+ pr_info("%s\n", retbleed_strings[retbleed_mitigation]);
+}
+
+#undef pr_fmt
+#define pr_fmt(fmt) "Spectre V2 : " fmt
+
static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init =
SPECTRE_V2_USER_NONE;
static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init =
@@ -828,6 +1016,7 @@ enum spectre_v2_mitigation_cmd {
SPECTRE_V2_CMD_EIBRS,
SPECTRE_V2_CMD_EIBRS_RETPOLINE,
SPECTRE_V2_CMD_EIBRS_LFENCE,
+ SPECTRE_V2_CMD_IBRS,
};
enum spectre_v2_user_cmd {
@@ -868,13 +1057,15 @@ static void __init spec_v2_user_print_cond(const char *reason, bool secure)
pr_info("spectre_v2_user=%s forced on command line.\n", reason);
}
+static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd;
+
static enum spectre_v2_user_cmd __init
-spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
+spectre_v2_parse_user_cmdline(void)
{
char arg[20];
int ret, i;
- switch (v2_cmd) {
+ switch (spectre_v2_cmd) {
case SPECTRE_V2_CMD_NONE:
return SPECTRE_V2_USER_CMD_NONE;
case SPECTRE_V2_CMD_FORCE:
@@ -900,15 +1091,16 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
return SPECTRE_V2_USER_CMD_AUTO;
}
-static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
+static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
{
- return (mode == SPECTRE_V2_EIBRS ||
- mode == SPECTRE_V2_EIBRS_RETPOLINE ||
- mode == SPECTRE_V2_EIBRS_LFENCE);
+ return mode == SPECTRE_V2_IBRS ||
+ mode == SPECTRE_V2_EIBRS ||
+ mode == SPECTRE_V2_EIBRS_RETPOLINE ||
+ mode == SPECTRE_V2_EIBRS_LFENCE;
}
static void __init
-spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
+spectre_v2_user_select_mitigation(void)
{
enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
bool smt_possible = IS_ENABLED(CONFIG_SMP);
@@ -921,7 +1113,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
smt_possible = false;
- cmd = spectre_v2_parse_user_cmdline(v2_cmd);
+ cmd = spectre_v2_parse_user_cmdline();
switch (cmd) {
case SPECTRE_V2_USER_CMD_NONE:
goto set_mode;
@@ -969,12 +1161,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
}
/*
- * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not
- * required.
+ * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible,
+ * STIBP is not required.
*/
if (!boot_cpu_has(X86_FEATURE_STIBP) ||
!smt_possible ||
- spectre_v2_in_eibrs_mode(spectre_v2_enabled))
+ spectre_v2_in_ibrs_mode(spectre_v2_enabled))
return;
/*
@@ -986,6 +1178,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
mode = SPECTRE_V2_USER_STRICT_PREFERRED;
+ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) {
+ if (mode != SPECTRE_V2_USER_STRICT &&
+ mode != SPECTRE_V2_USER_STRICT_PREFERRED)
+ pr_info("Selecting STIBP always-on mode to complement retbleed mitigation'\n");
+ mode = SPECTRE_V2_USER_STRICT_PREFERRED;
+ }
+
spectre_v2_user_stibp = mode;
set_mode:
@@ -999,6 +1198,7 @@ static const char * const spectre_v2_strings[] = {
[SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS",
[SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE",
[SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines",
+ [SPECTRE_V2_IBRS] = "Mitigation: IBRS",
};
static const struct {
@@ -1016,6 +1216,7 @@ static const struct {
{ "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false },
{ "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false },
{ "auto", SPECTRE_V2_CMD_AUTO, false },
+ { "ibrs", SPECTRE_V2_CMD_IBRS, false },
};
static void __init spec_v2_print_cond(const char *reason, bool secure)
@@ -1078,6 +1279,30 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
return SPECTRE_V2_CMD_AUTO;
}
+ if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_CPU_IBRS_ENTRY)) {
+ pr_err("%s selected but not compiled in. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
+ pr_err("%s selected but not Intel CPU. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) {
+ pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) {
+ pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
spec_v2_print_cond(mitigation_options[i].option,
mitigation_options[i].secure);
return cmd;
@@ -1093,6 +1318,22 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
return SPECTRE_V2_RETPOLINE;
}
+/* Disable in-kernel use of non-RSB RET predictors */
+static void __init spec_ctrl_disable_kernel_rrsba(void)
+{
+ u64 ia32_cap;
+
+ if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
+ return;
+
+ ia32_cap = x86_read_arch_cap_msr();
+
+ if (ia32_cap & ARCH_CAP_RRSBA) {
+ x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
+ write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ }
+}
+
static void __init spectre_v2_select_mitigation(void)
{
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -1117,6 +1358,15 @@ static void __init spectre_v2_select_mitigation(void)
break;
}
+ if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) &&
+ boot_cpu_has_bug(X86_BUG_RETBLEED) &&
+ retbleed_cmd != RETBLEED_CMD_OFF &&
+ boot_cpu_has(X86_FEATURE_IBRS) &&
+ boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+ mode = SPECTRE_V2_IBRS;
+ break;
+ }
+
mode = spectre_v2_select_retpoline();
break;
@@ -1133,6 +1383,10 @@ static void __init spectre_v2_select_mitigation(void)
mode = spectre_v2_select_retpoline();
break;
+ case SPECTRE_V2_CMD_IBRS:
+ mode = SPECTRE_V2_IBRS;
+ break;
+
case SPECTRE_V2_CMD_EIBRS:
mode = SPECTRE_V2_EIBRS;
break;
@@ -1149,10 +1403,9 @@ static void __init spectre_v2_select_mitigation(void)
if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
- if (spectre_v2_in_eibrs_mode(mode)) {
- /* Force it so VMEXIT will restore correctly */
+ if (spectre_v2_in_ibrs_mode(mode)) {
x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ write_spec_ctrl_current(x86_spec_ctrl_base, true);
}
switch (mode) {
@@ -1160,6 +1413,10 @@ static void __init spectre_v2_select_mitigation(void)
case SPECTRE_V2_EIBRS:
break;
+ case SPECTRE_V2_IBRS:
+ setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS);
+ break;
+
case SPECTRE_V2_LFENCE:
case SPECTRE_V2_EIBRS_LFENCE:
setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE);
@@ -1171,43 +1428,107 @@ static void __init spectre_v2_select_mitigation(void)
break;
}
+ /*
+ * Disable alternate RSB predictions in kernel when indirect CALLs and
+ * JMPs gets protection against BHI and Intramode-BTI, but RET
+ * prediction from a non-RSB predictor is still a risk.
+ */
+ if (mode == SPECTRE_V2_EIBRS_LFENCE ||
+ mode == SPECTRE_V2_EIBRS_RETPOLINE ||
+ mode == SPECTRE_V2_RETPOLINE)
+ spec_ctrl_disable_kernel_rrsba();
+
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
/*
- * If spectre v2 protection has been enabled, unconditionally fill
- * RSB during a context switch; this protects against two independent
- * issues:
+ * If Spectre v2 protection has been enabled, fill the RSB during a
+ * context switch. In general there are two types of RSB attacks
+ * across context switches, for which the CALLs/RETs may be unbalanced.
*
- * - RSB underflow (and switch to BTB) on Skylake+
- * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs
+ * 1) RSB underflow
+ *
+ * Some Intel parts have "bottomless RSB". When the RSB is empty,
+ * speculated return targets may come from the branch predictor,
+ * which could have a user-poisoned BTB or BHB entry.
+ *
+ * AMD has it even worse: *all* returns are speculated from the BTB,
+ * regardless of the state of the RSB.
+ *
+ * When IBRS or eIBRS is enabled, the "user -> kernel" attack
+ * scenario is mitigated by the IBRS branch prediction isolation
+ * properties, so the RSB buffer filling wouldn't be necessary to
+ * protect against this type of attack.
+ *
+ * The "user -> user" attack scenario is mitigated by RSB filling.
+ *
+ * 2) Poisoned RSB entry
+ *
+ * If the 'next' in-kernel return stack is shorter than 'prev',
+ * 'next' could be tricked into speculating with a user-poisoned RSB
+ * entry.
+ *
+ * The "user -> kernel" attack scenario is mitigated by SMEP and
+ * eIBRS.
+ *
+ * The "user -> user" scenario, also known as SpectreBHB, requires
+ * RSB clearing.
+ *
+ * So to mitigate all cases, unconditionally fill RSB on context
+ * switches.
+ *
+ * FIXME: Is this pointless for retbleed-affected AMD?
*/
setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
/*
- * Retpoline means the kernel is safe because it has no indirect
- * branches. Enhanced IBRS protects firmware too, so, enable restricted
- * speculation around firmware calls only when Enhanced IBRS isn't
- * supported.
+ * Similar to context switches, there are two types of RSB attacks
+ * after vmexit:
+ *
+ * 1) RSB underflow
+ *
+ * 2) Poisoned RSB entry
+ *
+ * When retpoline is enabled, both are mitigated by filling/clearing
+ * the RSB.
+ *
+ * When IBRS is enabled, while #1 would be mitigated by the IBRS branch
+ * prediction isolation protections, RSB still needs to be cleared
+ * because of #2. Note that SMEP provides no protection here, unlike
+ * user-space-poisoned RSB entries.
+ *
+ * eIBRS, on the other hand, has RSB-poisoning protections, so it
+ * doesn't need RSB clearing after vmexit.
+ */
+ if (boot_cpu_has(X86_FEATURE_RETPOLINE) ||
+ boot_cpu_has(X86_FEATURE_KERNEL_IBRS))
+ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
+
+ /*
+ * Retpoline protects the kernel, but doesn't protect firmware. IBRS
+ * and Enhanced IBRS protect firmware too, so enable IBRS around
+ * firmware calls only when IBRS / Enhanced IBRS aren't otherwise
+ * enabled.
*
* Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because
* the user might select retpoline on the kernel command line and if
* the CPU supports Enhanced IBRS, kernel might un-intentionally not
* enable IBRS around firmware calls.
*/
- if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) {
+ if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
pr_info("Enabling Restricted Speculation for firmware calls\n");
}
/* Set up IBPB and STIBP depending on the general spectre V2 command */
- spectre_v2_user_select_mitigation(cmd);
+ spectre_v2_cmd = cmd;
}
static void update_stibp_msr(void * __unused)
{
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP);
+ write_spec_ctrl_current(val, true);
}
/* Update x86_spec_ctrl_base in case SMT state changed. */
@@ -1424,16 +1745,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
}
/*
- * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
- * bit in the mask to allow guests to use the mitigation even in the
- * case where the host does not enable it.
- */
- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
- static_cpu_has(X86_FEATURE_AMD_SSBD)) {
- x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
- }
-
- /*
* We have three CPU feature flags that are in play here:
* - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
* - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
@@ -1450,7 +1761,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
x86_amd_ssb_disable();
} else {
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ write_spec_ctrl_current(x86_spec_ctrl_base, true);
}
}
@@ -1701,7 +2012,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
void x86_spec_ctrl_setup_ap(void)
{
if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ write_spec_ctrl_current(x86_spec_ctrl_base, true);
if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
x86_amd_ssb_disable();
@@ -1938,7 +2249,7 @@ static ssize_t mmio_stale_data_show_state(char *buf)
static char *stibp_state(void)
{
- if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
+ if (spectre_v2_in_ibrs_mode(spectre_v2_enabled))
return "";
switch (spectre_v2_user_stibp) {
@@ -1994,6 +2305,24 @@ static ssize_t srbds_show_state(char *buf)
return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]);
}
+static ssize_t retbleed_show_state(char *buf)
+{
+ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) {
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+ return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n");
+
+ return sprintf(buf, "%s; SMT %s\n",
+ retbleed_strings[retbleed_mitigation],
+ !sched_smt_active() ? "disabled" :
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ?
+ "enabled with STIBP protection" : "vulnerable");
+ }
+
+ return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
+}
+
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
char *buf, unsigned int bug)
{
@@ -2039,6 +2368,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_MMIO_STALE_DATA:
return mmio_stale_data_show_state(buf);
+ case X86_BUG_RETBLEED:
+ return retbleed_show_state(buf);
+
default:
break;
}
@@ -2095,4 +2427,9 @@ ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *at
{
return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
}
+
+ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED);
+}
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4730b0a58f24..736262a76a12 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1205,48 +1205,60 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
{}
};
+#define VULNBL(vendor, family, model, blacklist) \
+ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist)
+
#define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \
X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \
INTEL_FAM6_##model, steppings, \
X86_FEATURE_ANY, issues)
+#define VULNBL_AMD(family, blacklist) \
+ VULNBL(AMD, family, X86_MODEL_ANY, blacklist)
+
+#define VULNBL_HYGON(family, blacklist) \
+ VULNBL(HYGON, family, X86_MODEL_ANY, blacklist)
+
#define SRBDS BIT(0)
/* CPU is affected by X86_BUG_MMIO_STALE_DATA */
#define MMIO BIT(1)
/* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */
#define MMIO_SBDS BIT(2)
+/* CPU is affected by RETbleed, speculating where you would not expect it */
+#define RETBLEED BIT(3)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS),
- VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO),
- VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x5), MMIO),
+ VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS),
- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO),
- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS),
- VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) |
- BIT(7) | BIT(0xB), MMIO),
- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO),
- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS),
- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO),
- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS),
- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO),
- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS),
- VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS),
- VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO),
- VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO),
- VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS),
- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO),
- VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
- VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO),
- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO),
- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
+
+ VULNBL_AMD(0x15, RETBLEED),
+ VULNBL_AMD(0x16, RETBLEED),
+ VULNBL_AMD(0x17, RETBLEED),
+ VULNBL_HYGON(0x18, RETBLEED),
{}
};
@@ -1348,6 +1360,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
!arch_cap_mmio_immune(ia32_cap))
setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
+ if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
+ if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
+ setup_force_cpu_bug(X86_BUG_RETBLEED);
+ }
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 2a8e584fc991..7c9b5893c30a 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -61,6 +61,8 @@ static inline void tsx_init(void) { }
static inline void tsx_ap_init(void) { }
#endif /* CONFIG_CPU_SUP_INTEL */
+extern void init_spectral_chicken(struct cpuinfo_x86 *c);
+
extern void get_cpu_cap(struct cpuinfo_x86 *c);
extern void get_cpu_address_sizes(struct cpuinfo_x86 *c);
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
index 3fcdda4c1e11..21fd425088fe 100644
--- a/arch/x86/kernel/cpu/hygon.c
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -302,6 +302,12 @@ static void init_hygon(struct cpuinfo_x86 *c)
/* get apicid instead of initial apic id from cpuid */
c->apicid = hard_smp_processor_id();
+ /*
+ * XXX someone from Hygon needs to confirm this DTRT
+ *
+ init_spectral_chicken(c);
+ */
+
set_cpu_cap(c, X86_FEATURE_ZEN);
set_cpu_cap(c, X86_FEATURE_CPB);
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index dbaa8326d6f2..fd44b54c90d5 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -27,6 +27,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
+ { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
{ X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
{ X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
{ X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 5b4efc927d80..24b9fa89aa27 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -301,7 +301,7 @@ union ftrace_op_code_union {
} __attribute__((packed));
};
-#define RET_SIZE 1 + IS_ENABLED(CONFIG_SLS)
+#define RET_SIZE (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS))
static unsigned long
create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
@@ -357,7 +357,10 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
goto fail;
ip = trampoline + size;
- memcpy(ip, retq, RET_SIZE);
+ if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+ __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE);
+ else
+ memcpy(ip, retq, sizeof(retq));
/* No need to test direct calls on created trampolines */
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 92c4afa2b729..d860d437631b 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -389,6 +389,8 @@ SYM_CODE_START_NOALIGN(vc_boot_ghcb)
UNWIND_HINT_IRET_REGS offset=8
ENDBR
+ ANNOTATE_UNRET_END
+
/* Build pt_regs */
PUSH_AND_CLEAR_REGS
@@ -448,6 +450,7 @@ SYM_CODE_END(early_idt_handler_array)
SYM_CODE_START_LOCAL(early_idt_handler_common)
UNWIND_HINT_IRET_REGS offset=16
+ ANNOTATE_UNRET_END
/*
* The stack is the hardware frame, an error code or zero, and the
* vector number.
@@ -497,6 +500,8 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb)
UNWIND_HINT_IRET_REGS offset=8
ENDBR
+ ANNOTATE_UNRET_END
+
/* Build pt_regs */
PUSH_AND_CLEAR_REGS
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index b98ffcf4d250..67828d973389 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -253,7 +253,7 @@ int module_finalize(const Elf_Ehdr *hdr,
{
const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
*para = NULL, *orc = NULL, *orc_ip = NULL,
- *retpolines = NULL, *ibt_endbr = NULL;
+ *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
@@ -271,6 +271,8 @@ int module_finalize(const Elf_Ehdr *hdr,
orc_ip = s;
if (!strcmp(".retpoline_sites", secstrings + s->sh_name))
retpolines = s;
+ if (!strcmp(".return_sites", secstrings + s->sh_name))
+ returns = s;
if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name))
ibt_endbr = s;
}
@@ -287,6 +289,10 @@ int module_finalize(const Elf_Ehdr *hdr,
void *rseg = (void *)retpolines->sh_addr;
apply_retpolines(rseg, rseg + retpolines->sh_size);
}
+ if (returns) {
+ void *rseg = (void *)returns->sh_addr;
+ apply_returns(rseg, rseg + returns->sh_size);
+ }
if (alt) {
/* patch .altinstructions */
void *aseg = (void *)alt->sh_addr;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 9b2772b7e1f3..d456ce21c255 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -600,7 +600,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp,
}
if (updmsr)
- wrmsrl(MSR_IA32_SPEC_CTRL, msr);
+ write_spec_ctrl_current(msr, false);
}
static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index fcc8a7699103..c7c4b1917336 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -7,10 +7,12 @@
#include <linux/linkage.h>
#include <asm/page_types.h>
#include <asm/kexec.h>
+#include <asm/nospec-branch.h>
#include <asm/processor-flags.h>
/*
- * Must be relocatable PIC code callable as a C function
+ * Must be relocatable PIC code callable as a C function, in particular
+ * there must be a plain RET and not jump to return thunk.
*/
#define PTR(x) (x << 2)
@@ -91,7 +93,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
movl %edi, %eax
addl $(identity_mapped - relocate_kernel), %eax
pushl %eax
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(relocate_kernel)
SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
@@ -159,12 +163,15 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
xorl %edx, %edx
xorl %esi, %esi
xorl %ebp, %ebp
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
1:
popl %edx
movl CP_PA_SWAP_PAGE(%edi), %esp
addl $PAGE_SIZE, %esp
2:
+ ANNOTATE_RETPOLINE_SAFE
call *%edx
/* get the re-entry point of the peer system */
@@ -190,7 +197,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
movl %edi, %eax
addl $(virtual_mapped - relocate_kernel), %eax
pushl %eax
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(identity_mapped)
SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
@@ -208,7 +217,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
popl %edi
popl %esi
popl %ebx
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(virtual_mapped)
/* Do the copies */
@@ -271,7 +282,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
popl %edi
popl %ebx
popl %ebp
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(swap_pages)
.globl kexec_control_code_size
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index c1d8626c53b6..4809c0dc4eb0 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -13,7 +13,8 @@
#include <asm/unwind_hints.h>
/*
- * Must be relocatable PIC code callable as a C function
+ * Must be relocatable PIC code callable as a C function, in particular
+ * there must be a plain RET and not jump to return thunk.
*/
#define PTR(x) (x << 3)
@@ -105,7 +106,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
/* jump to identity mapped page */
addq $(identity_mapped - relocate_kernel), %r8
pushq %r8
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(relocate_kernel)
SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
@@ -200,7 +203,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
xorl %r14d, %r14d
xorl %r15d, %r15d
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
1:
popq %rdx
@@ -219,7 +224,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
call swap_pages
movq $virtual_mapped, %rax
pushq %rax
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(identity_mapped)
SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
@@ -241,7 +248,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
popq %r12
popq %rbp
popq %rbx
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(virtual_mapped)
/* Do the copies */
@@ -298,7 +307,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
lea PAGE_SIZE(%rax), %rsi
jmp 0b
3:
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(swap_pages)
.globl kexec_control_code_size
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index aa72cefdd5be..aaaba85d6d7f 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -12,13 +12,21 @@ enum insn_type {
};
/*
+ * ud1 %esp, %ecx - a 3 byte #UD that is unique to trampolines, chosen such
+ * that there is no false-positive trampoline identification while also being a
+ * speculation stop.
+ */
+static const u8 tramp_ud[] = { 0x0f, 0xb9, 0xcc };
+
+/*
* cs cs cs xorl %eax, %eax - a single 5 byte instruction that clears %[er]ax
*/
static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 };
static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc };
-static void __ref __static_call_transform(void *insn, enum insn_type type, void *func)
+static void __ref __static_call_transform(void *insn, enum insn_type type,
+ void *func, bool modinit)
{
const void *emulate = NULL;
int size = CALL_INSN_SIZE;
@@ -43,14 +51,17 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void
break;
case RET:
- code = &retinsn;
+ if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+ code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk);
+ else
+ code = &retinsn;
break;
}
if (memcmp(insn, code, size) == 0)
return;
- if (unlikely(system_state == SYSTEM_BOOTING))
+ if (system_state == SYSTEM_BOOTING || modinit)
return text_poke_early(insn, code, size);
text_poke_bp(insn, code, size, emulate);
@@ -60,7 +71,7 @@ static void __static_call_validate(void *insn, bool tail, bool tramp)
{
u8 opcode = *(u8 *)insn;
- if (tramp && memcmp(insn+5, "SCT", 3)) {
+ if (tramp && memcmp(insn+5, tramp_ud, 3)) {
pr_err("trampoline signature fail");
BUG();
}
@@ -104,14 +115,42 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
if (tramp) {
__static_call_validate(tramp, true, true);
- __static_call_transform(tramp, __sc_insn(!func, true), func);
+ __static_call_transform(tramp, __sc_insn(!func, true), func, false);
}
if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) {
__static_call_validate(site, tail, false);
- __static_call_transform(site, __sc_insn(!func, tail), func);
+ __static_call_transform(site, __sc_insn(!func, tail), func, false);
}
mutex_unlock(&text_mutex);
}
EXPORT_SYMBOL_GPL(arch_static_call_transform);
+
+#ifdef CONFIG_RETHUNK
+/*
+ * This is called by apply_returns() to fix up static call trampolines,
+ * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as
+ * having a return trampoline.
+ *
+ * The problem is that static_call() is available before determining
+ * X86_FEATURE_RETHUNK and, by implication, running alternatives.
+ *
+ * This means that __static_call_transform() above can have overwritten the
+ * return trampoline and we now need to fix things up to be consistent.
+ */
+bool __static_call_fixup(void *tramp, u8 op, void *dest)
+{
+ if (memcmp(tramp+5, tramp_ud, 3)) {
+ /* Not a trampoline site, not our problem. */
+ return false;
+ }
+
+ mutex_lock(&text_mutex);
+ if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk)
+ __static_call_transform(tramp, RET, NULL, true);
+ mutex_unlock(&text_mutex);
+
+ return true;
+}
+#endif
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 9487ce8c13ee..15f29053cec4 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -141,7 +141,7 @@ SECTIONS
#ifdef CONFIG_RETPOLINE
__indirect_thunk_start = .;
- *(.text.__x86.indirect_thunk)
+ *(.text.__x86.*)
__indirect_thunk_end = .;
#endif
} :text =0xcccc
@@ -283,6 +283,13 @@ SECTIONS
*(.retpoline_sites)
__retpoline_sites_end = .;
}
+
+ . = ALIGN(8);
+ .return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) {
+ __return_sites = .;
+ *(.return_sites)
+ __return_sites_end = .;
+ }
#endif
#ifdef CONFIG_X86_KERNEL_IBT
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 89b11e7dca8a..db96bf7d1122 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -325,13 +325,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
#define FOP_RET(name) \
__FOP_RET(#name)
-#define FOP_START(op) \
+#define __FOP_START(op, align) \
extern void em_##op(struct fastop *fake); \
asm(".pushsection .text, \"ax\" \n\t" \
".global em_" #op " \n\t" \
- ".align " __stringify(FASTOP_SIZE) " \n\t" \
+ ".align " __stringify(align) " \n\t" \
"em_" #op ":\n\t"
+#define FOP_START(op) __FOP_START(op, FASTOP_SIZE)
+
#define FOP_END \
".popsection")
@@ -435,16 +437,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
/*
* Depending on .config the SETcc functions look like:
*
- * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT]
- * SETcc %al [3 bytes]
- * RET [1 byte]
- * INT3 [1 byte; CONFIG_SLS]
- *
- * Which gives possible sizes 4, 5, 8 or 9. When rounded up to the
- * next power-of-two alignment they become 4, 8 or 16 resp.
+ * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT]
+ * SETcc %al [3 bytes]
+ * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK]
+ * INT3 [1 byte; CONFIG_SLS]
*/
-#define SETCC_LENGTH (ENDBR_INSN_SIZE + 4 + IS_ENABLED(CONFIG_SLS))
-#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS) << HAS_KERNEL_IBT)
+#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \
+ IS_ENABLED(CONFIG_SLS))
+#define SETCC_LENGTH (ENDBR_INSN_SIZE + 3 + RET_LENGTH)
+#define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1))
static_assert(SETCC_LENGTH <= SETCC_ALIGN);
#define FOP_SETCC(op) \
@@ -453,9 +454,10 @@ static_assert(SETCC_LENGTH <= SETCC_ALIGN);
#op ": \n\t" \
ASM_ENDBR \
#op " %al \n\t" \
- __FOP_RET(#op)
+ __FOP_RET(#op) \
+ ".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t"
-FOP_START(setcc)
+__FOP_START(setcc, SETCC_ALIGN)
FOP_SETCC(seto)
FOP_SETCC(setno)
FOP_SETCC(setc)
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index dfaeb47fcf2a..723f8534986c 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -111,6 +111,15 @@ SYM_FUNC_START(__svm_vcpu_run)
#endif
/*
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
+ * untrained as soon as we exit the VM and are back to the
+ * kernel. This should be done before re-enabling interrupts
+ * because interrupt handlers won't sanitize 'ret' if the return is
+ * from the kernel.
+ */
+ UNTRAIN_RET
+
+ /*
* Clear all general purpose registers except RSP and RAX to prevent
* speculative use of the guest's values, even those that are reloaded
* via the stack. In theory, an L1 cache miss when restoring registers
@@ -190,6 +199,15 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
#endif
+ /*
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
+ * untrained as soon as we exit the VM and are back to the
+ * kernel. This should be done before re-enabling interrupts
+ * because interrupt handlers won't sanitize RET if the return is
+ * from the kernel.
+ */
+ UNTRAIN_RET
+
pop %_ASM_BX
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 3f430e218375..c0e24826a86f 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -4,8 +4,8 @@
#include <asm/vmx.h>
-#include "lapic.h"
-#include "x86.h"
+#include "../lapic.h"
+#include "../x86.h"
extern bool __read_mostly enable_vpid;
extern bool __read_mostly flexpriority_enabled;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index f5cb18e00e78..3a4e895269d7 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3087,7 +3087,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
}
vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
- vmx->loaded_vmcs->launched);
+ __vmx_vcpu_run_flags(vmx));
if (vmx->msr_autoload.host.nr)
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h
new file mode 100644
index 000000000000..edc3f16cc189
--- /dev/null
+++ b/arch/x86/kvm/vmx/run_flags.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __KVM_X86_VMX_RUN_FLAGS_H
+#define __KVM_X86_VMX_RUN_FLAGS_H
+
+#define VMX_RUN_VMRESUME (1 << 0)
+#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1)
+
+#endif /* __KVM_X86_VMX_RUN_FLAGS_H */
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 435c187927c4..4182c7ffc909 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -1,10 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
#include <asm/asm.h>
+#include <asm/asm-offsets.h>
#include <asm/bitsperlong.h>
#include <asm/kvm_vcpu_regs.h>
#include <asm/nospec-branch.h>
+#include <asm/percpu.h>
#include <asm/segment.h>
+#include "run_flags.h"
#define WORD_SIZE (BITS_PER_LONG / 8)
@@ -31,72 +34,11 @@
.section .noinstr.text, "ax"
/**
- * vmx_vmenter - VM-Enter the current loaded VMCS
- *
- * %RFLAGS.ZF: !VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME
- *
- * Returns:
- * %RFLAGS.CF is set on VM-Fail Invalid
- * %RFLAGS.ZF is set on VM-Fail Valid
- * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
- *
- * Note that VMRESUME/VMLAUNCH fall-through and return directly if
- * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump
- * to vmx_vmexit.
- */
-SYM_FUNC_START_LOCAL(vmx_vmenter)
- /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */
- je 2f
-
-1: vmresume
- RET
-
-2: vmlaunch
- RET
-
-3: cmpb $0, kvm_rebooting
- je 4f
- RET
-4: ud2
-
- _ASM_EXTABLE(1b, 3b)
- _ASM_EXTABLE(2b, 3b)
-
-SYM_FUNC_END(vmx_vmenter)
-
-/**
- * vmx_vmexit - Handle a VMX VM-Exit
- *
- * Returns:
- * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
- *
- * This is vmx_vmenter's partner in crime. On a VM-Exit, control will jump
- * here after hardware loads the host's state, i.e. this is the destination
- * referred to by VMCS.HOST_RIP.
- */
-SYM_FUNC_START(vmx_vmexit)
-#ifdef CONFIG_RETPOLINE
- ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE
- /* Preserve guest's RAX, it's used to stuff the RSB. */
- push %_ASM_AX
-
- /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
- FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
-
- /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */
- or $1, %_ASM_AX
-
- pop %_ASM_AX
-.Lvmexit_skip_rsb:
-#endif
- RET
-SYM_FUNC_END(vmx_vmexit)
-
-/**
* __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
- * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp)
+ * @vmx: struct vcpu_vmx *
* @regs: unsigned long * (to guest registers)
- * @launched: %true if the VMCS has been launched
+ * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH
+ * VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl
*
* Returns:
* 0 on VM-Exit, 1 on VM-Fail
@@ -115,24 +57,56 @@ SYM_FUNC_START(__vmx_vcpu_run)
#endif
push %_ASM_BX
+ /* Save @vmx for SPEC_CTRL handling */
+ push %_ASM_ARG1
+
+ /* Save @flags for SPEC_CTRL handling */
+ push %_ASM_ARG3
+
/*
* Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and
* @regs is needed after VM-Exit to save the guest's register values.
*/
push %_ASM_ARG2
- /* Copy @launched to BL, _ASM_ARG3 is volatile. */
+ /* Copy @flags to BL, _ASM_ARG3 is volatile. */
mov %_ASM_ARG3B, %bl
- /* Adjust RSP to account for the CALL to vmx_vmenter(). */
- lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2
+ lea (%_ASM_SP), %_ASM_ARG2
call vmx_update_host_rsp
+ ALTERNATIVE "jmp .Lspec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL
+
+ /*
+ * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the
+ * host's, write the MSR.
+ *
+ * IMPORTANT: To avoid RSB underflow attacks and any other nastiness,
+ * there must not be any returns or indirect branches between this code
+ * and vmentry.
+ */
+ mov 2*WORD_SIZE(%_ASM_SP), %_ASM_DI
+ movl VMX_spec_ctrl(%_ASM_DI), %edi
+ movl PER_CPU_VAR(x86_spec_ctrl_current), %esi
+ cmp %edi, %esi
+ je .Lspec_ctrl_done
+ mov $MSR_IA32_SPEC_CTRL, %ecx
+ xor %edx, %edx
+ mov %edi, %eax
+ wrmsr
+
+.Lspec_ctrl_done:
+
+ /*
+ * Since vmentry is serializing on affected CPUs, there's no need for
+ * an LFENCE to stop speculation from skipping the wrmsr.
+ */
+
/* Load @regs to RAX. */
mov (%_ASM_SP), %_ASM_AX
/* Check if vmlaunch or vmresume is needed */
- testb %bl, %bl
+ testb $VMX_RUN_VMRESUME, %bl
/* Load guest registers. Don't clobber flags. */
mov VCPU_RCX(%_ASM_AX), %_ASM_CX
@@ -154,11 +128,37 @@ SYM_FUNC_START(__vmx_vcpu_run)
/* Load guest RAX. This kills the @regs pointer! */
mov VCPU_RAX(%_ASM_AX), %_ASM_AX
- /* Enter guest mode */
- call vmx_vmenter
+ /* Check EFLAGS.ZF from 'testb' above */
+ jz .Lvmlaunch
+
+ /*
+ * After a successful VMRESUME/VMLAUNCH, control flow "magically"
+ * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting.
+ * So this isn't a typical function and objtool needs to be told to
+ * save the unwind state here and restore it below.
+ */
+ UNWIND_HINT_SAVE
+
+/*
+ * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at
+ * the 'vmx_vmexit' label below.
+ */
+.Lvmresume:
+ vmresume
+ jmp .Lvmfail
+
+.Lvmlaunch:
+ vmlaunch
+ jmp .Lvmfail
- /* Jump on VM-Fail. */
- jbe 2f
+ _ASM_EXTABLE(.Lvmresume, .Lfixup)
+ _ASM_EXTABLE(.Lvmlaunch, .Lfixup)
+
+SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
+
+ /* Restore unwind state from before the VMRESUME/VMLAUNCH. */
+ UNWIND_HINT_RESTORE
+ ENDBR
/* Temporarily save guest's RAX. */
push %_ASM_AX
@@ -185,21 +185,23 @@ SYM_FUNC_START(__vmx_vcpu_run)
mov %r15, VCPU_R15(%_ASM_AX)
#endif
- /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */
- xor %eax, %eax
+ /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */
+ xor %ebx, %ebx
+.Lclear_regs:
/*
- * Clear all general purpose registers except RSP and RAX to prevent
+ * Clear all general purpose registers except RSP and RBX to prevent
* speculative use of the guest's values, even those that are reloaded
* via the stack. In theory, an L1 cache miss when restoring registers
* could lead to speculative execution with the guest's values.
* Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
* free. RSP and RAX are exempt as RSP is restored by hardware during
- * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail.
+ * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return
+ * value.
*/
-1: xor %ecx, %ecx
+ xor %eax, %eax
+ xor %ecx, %ecx
xor %edx, %edx
- xor %ebx, %ebx
xor %ebp, %ebp
xor %esi, %esi
xor %edi, %edi
@@ -216,8 +218,30 @@ SYM_FUNC_START(__vmx_vcpu_run)
/* "POP" @regs. */
add $WORD_SIZE, %_ASM_SP
- pop %_ASM_BX
+ /*
+ * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
+ * the first unbalanced RET after vmexit!
+ *
+ * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB
+ * entries and (in some cases) RSB underflow.
+ *
+ * eIBRS has its own protection against poisoned RSB, so it doesn't
+ * need the RSB filling sequence. But it does need to be enabled
+ * before the first unbalanced RET.
+ */
+
+ FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT
+
+ pop %_ASM_ARG2 /* @flags */
+ pop %_ASM_ARG1 /* @vmx */
+
+ call vmx_spec_ctrl_restore_host
+
+ /* Put return value in AX */
+ mov %_ASM_BX, %_ASM_AX
+
+ pop %_ASM_BX
#ifdef CONFIG_X86_64
pop %r12
pop %r13
@@ -230,9 +254,15 @@ SYM_FUNC_START(__vmx_vcpu_run)
pop %_ASM_BP
RET
- /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */
-2: mov $1, %eax
- jmp 1b
+.Lfixup:
+ cmpb $0, kvm_rebooting
+ jne .Lvmfail
+ ud2
+.Lvmfail:
+ /* VM-Fail: set return value to 1 */
+ mov $1, %_ASM_BX
+ jmp .Lclear_regs
+
SYM_FUNC_END(__vmx_vcpu_run)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 3a919e49129b..be7c19374fdd 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -383,9 +383,9 @@ static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
if (!vmx->disable_fb_clear)
return;
- rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
+ msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL);
msr |= FB_CLEAR_DIS;
- wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
+ native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
/* Cache the MSR value to avoid reading it later */
vmx->msr_ia32_mcu_opt_ctrl = msr;
}
@@ -396,7 +396,7 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
return;
vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS;
- wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
+ native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
}
static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
@@ -839,6 +839,24 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr)
MSR_IA32_SPEC_CTRL);
}
+unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
+{
+ unsigned int flags = 0;
+
+ if (vmx->loaded_vmcs->launched)
+ flags |= VMX_RUN_VMRESUME;
+
+ /*
+ * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free
+ * to change it directly without causing a vmexit. In that case read
+ * it after vmexit and store it in vmx->spec_ctrl.
+ */
+ if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
+ flags |= VMX_RUN_SAVE_SPEC_CTRL;
+
+ return flags;
+}
+
static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
unsigned long entry, unsigned long exit)
{
@@ -6813,6 +6831,31 @@ void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
}
}
+void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
+ unsigned int flags)
+{
+ u64 hostval = this_cpu_read(x86_spec_ctrl_current);
+
+ if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL))
+ return;
+
+ if (flags & VMX_RUN_SAVE_SPEC_CTRL)
+ vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL);
+
+ /*
+ * If the guest/host SPEC_CTRL values differ, restore the host value.
+ *
+ * For legacy IBRS, the IBRS bit always needs to be written after
+ * transitioning from a less privileged predictor mode, regardless of
+ * whether the guest/host values differ.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) ||
+ vmx->spec_ctrl != hostval)
+ native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval);
+
+ barrier_nospec();
+}
+
static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
{
switch (to_vmx(vcpu)->exit_reason.basic) {
@@ -6826,7 +6869,8 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
}
static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
- struct vcpu_vmx *vmx)
+ struct vcpu_vmx *vmx,
+ unsigned long flags)
{
guest_state_enter_irqoff();
@@ -6845,7 +6889,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
native_write_cr2(vcpu->arch.cr2);
vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
- vmx->loaded_vmcs->launched);
+ flags);
vcpu->arch.cr2 = native_read_cr2();
@@ -6944,36 +6988,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
kvm_wait_lapic_expire(vcpu);
- /*
- * If this vCPU has touched SPEC_CTRL, restore the guest's value if
- * it's non-zero. Since vmentry is serialising on affected CPUs, there
- * is no need to worry about the conditional branch over the wrmsr
- * being speculatively taken.
- */
- x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
-
/* The actual VMENTER/EXIT is in the .noinstr.text section. */
- vmx_vcpu_enter_exit(vcpu, vmx);
-
- /*
- * We do not use IBRS in the kernel. If this vCPU has used the
- * SPEC_CTRL MSR it may have left it on; save the value and
- * turn it off. This is much more efficient than blindly adding
- * it to the atomic save/restore list. Especially as the former
- * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
- *
- * For non-nested case:
- * If the L01 MSR bitmap does not intercept the MSR, then we need to
- * save it.
- *
- * For nested case:
- * If the L02 MSR bitmap does not intercept the MSR, then we need to
- * save it.
- */
- if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
- vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
-
- x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
+ vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx));
/* All fields are clean at this point */
if (static_branch_unlikely(&enable_evmcs)) {
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 8d2342ede0c5..1e7f9453894b 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -8,11 +8,12 @@
#include <asm/intel_pt.h>
#include "capabilities.h"
-#include "kvm_cache_regs.h"
+#include "../kvm_cache_regs.h"
#include "posted_intr.h"
#include "vmcs.h"
#include "vmx_ops.h"
-#include "cpuid.h"
+#include "../cpuid.h"
+#include "run_flags.h"
#define MSR_TYPE_R 1
#define MSR_TYPE_W 2
@@ -404,7 +405,10 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
-bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
+void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags);
+unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx);
+bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs,
+ unsigned int flags);
int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index 5e7f41225780..5cfc49ddb1b4 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -8,7 +8,7 @@
#include "evmcs.h"
#include "vmcs.h"
-#include "x86.h"
+#include "../x86.h"
asmlinkage void vmread_error(unsigned long field, bool fault);
__attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1910e1e78b15..26d0cac32f73 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12631,9 +12631,9 @@ void kvm_arch_end_assignment(struct kvm *kvm)
}
EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
-bool kvm_arch_has_assigned_device(struct kvm *kvm)
+bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm)
{
- return atomic_read(&kvm->arch.assigned_device_count);
+ return arch_atomic_read(&kvm->arch.assigned_device_count);
}
EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index d83cba364e31..724bbf83eb5b 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -39,7 +39,7 @@ SYM_FUNC_START(__memmove)
/* FSRM implies ERMS => no length checks, do the copy directly */
.Lmemmove_begin_forward:
ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM
- ALTERNATIVE "", __stringify(movq %rdx, %rcx; rep movsb; RET), X86_FEATURE_ERMS
+ ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS
/*
* movsq instruction have many startup latency
@@ -205,6 +205,11 @@ SYM_FUNC_START(__memmove)
movb %r11b, (%rdi)
13:
RET
+
+.Lmemmove_erms:
+ movq %rdx, %rcx
+ rep movsb
+ RET
SYM_FUNC_END(__memmove)
EXPORT_SYMBOL(__memmove)
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index b2b2366885a2..073289a55f84 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -33,9 +33,9 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
UNWIND_HINT_EMPTY
ANNOTATE_NOENDBR
- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
- __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE
+ ALTERNATIVE_2 __stringify(RETPOLINE \reg), \
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \
+ __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE)
.endm
@@ -67,3 +67,76 @@ SYM_CODE_END(__x86_indirect_thunk_array)
#define GEN(reg) EXPORT_THUNK(reg)
#include <asm/GEN-for-each-reg.h>
#undef GEN
+
+/*
+ * This function name is magical and is used by -mfunction-return=thunk-extern
+ * for the compiler to generate JMPs to it.
+ */
+#ifdef CONFIG_RETHUNK
+
+ .section .text.__x86.return_thunk
+
+/*
+ * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
+ * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
+ * alignment within the BTB.
+ * 2) The instruction at zen_untrain_ret must contain, and not
+ * end with, the 0xc3 byte of the RET.
+ * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
+ * from re-poisioning the BTB prediction.
+ */
+ .align 64
+ .skip 63, 0xcc
+SYM_FUNC_START_NOALIGN(zen_untrain_ret);
+
+ /*
+ * As executed from zen_untrain_ret, this is:
+ *
+ * TEST $0xcc, %bl
+ * LFENCE
+ * JMP __x86_return_thunk
+ *
+ * Executing the TEST instruction has a side effect of evicting any BTB
+ * prediction (potentially attacker controlled) attached to the RET, as
+ * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
+ */
+ .byte 0xf6
+
+ /*
+ * As executed from __x86_return_thunk, this is a plain RET.
+ *
+ * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
+ *
+ * We subsequently jump backwards and architecturally execute the RET.
+ * This creates a correct BTB prediction (type=ret), but in the
+ * meantime we suffer Straight Line Speculation (because the type was
+ * no branch) which is halted by the INT3.
+ *
+ * With SMT enabled and STIBP active, a sibling thread cannot poison
+ * RET's prediction to a type of its choice, but can evict the
+ * prediction due to competitive sharing. If the prediction is
+ * evicted, __x86_return_thunk will suffer Straight Line Speculation
+ * which will be contained safely by the INT3.
+ */
+SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL)
+ ret
+ int3
+SYM_CODE_END(__x86_return_thunk)
+
+ /*
+ * Ensure the TEST decoding / BTB invalidation is complete.
+ */
+ lfence
+
+ /*
+ * Jump back and execute the RET in the middle of the TEST instruction.
+ * INT3 is for SLS protection.
+ */
+ jmp __x86_return_thunk
+ int3
+SYM_FUNC_END(zen_untrain_ret)
+__EXPORT_THUNK(zen_untrain_ret)
+
+EXPORT_SYMBOL(__x86_return_thunk)
+
+#endif /* CONFIG_RETHUNK */
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 3d1dba05fce4..9de3d900bc92 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -65,7 +65,10 @@ SYM_FUNC_START(sme_encrypt_execute)
movq %rbp, %rsp /* Restore original stack pointer */
pop %rbp
- RET
+ /* Offset to __x86_return_thunk would be wrong here */
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_FUNC_END(sme_encrypt_execute)
SYM_FUNC_START(__enc_copy)
@@ -151,6 +154,9 @@ SYM_FUNC_START(__enc_copy)
pop %r12
pop %r15
- RET
+ /* Offset to __x86_return_thunk would be wrong here */
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
.L__enc_copy_end:
SYM_FUNC_END(__enc_copy)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index c98b8c0ed3b8..b808c9a80d1b 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -412,16 +412,30 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
{
u8 *prog = *pprog;
-#ifdef CONFIG_RETPOLINE
if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
EMIT_LFENCE();
EMIT2(0xFF, 0xE0 + reg);
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
OPTIMIZER_HIDE_VAR(reg);
emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
- } else
-#endif
- EMIT2(0xFF, 0xE0 + reg);
+ } else {
+ EMIT2(0xFF, 0xE0 + reg);
+ }
+
+ *pprog = prog;
+}
+
+static void emit_return(u8 **pprog, u8 *ip)
+{
+ u8 *prog = *pprog;
+
+ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
+ emit_jump(&prog, &__x86_return_thunk, ip);
+ } else {
+ EMIT1(0xC3); /* ret */
+ if (IS_ENABLED(CONFIG_SLS))
+ EMIT1(0xCC); /* int3 */
+ }
*pprog = prog;
}
@@ -1686,7 +1700,7 @@ emit_jmp:
ctx->cleanup_addr = proglen;
pop_callee_regs(&prog, callee_regs_used);
EMIT1(0xC9); /* leave */
- EMIT1(0xC3); /* ret */
+ emit_return(&prog, image + addrs[i - 1] + (prog - temp));
break;
default:
@@ -2189,7 +2203,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
if (flags & BPF_TRAMP_F_SKIP_FRAME)
/* skip our return address and return to parent */
EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
- EMIT1(0xC3); /* ret */
+ emit_return(&prog, prog);
/* Make sure the trampoline generation logic doesn't overflow */
if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) {
ret = -EFAULT;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 81aa46f770c5..cfa99e8f054b 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -918,7 +918,7 @@ void xen_enable_sysenter(void)
if (!boot_cpu_has(sysenter_feature))
return;
- ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target);
+ ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat);
if(ret != 0)
setup_clear_cpu_cap(sysenter_feature);
}
@@ -927,7 +927,7 @@ void xen_enable_syscall(void)
{
int ret;
- ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
+ ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64);
if (ret != 0) {
printk(KERN_ERR "Failed to set syscall callback: %d\n", ret);
/* Pretty fatal; 64-bit userspace has no other
@@ -936,7 +936,7 @@ void xen_enable_syscall(void)
if (boot_cpu_has(X86_FEATURE_SYSCALL32)) {
ret = register_callback(CALLBACKTYPE_syscall32,
- xen_syscall32_target);
+ xen_entry_SYSCALL_compat);
if (ret != 0)
setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
}
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index caa9bc2fa100..6b4fdf6b9542 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -121,7 +121,7 @@ SYM_FUNC_END(xen_read_cr2_direct);
.macro xen_pv_trap name
SYM_CODE_START(xen_\name)
- UNWIND_HINT_EMPTY
+ UNWIND_HINT_ENTRY
ENDBR
pop %rcx
pop %r11
@@ -234,8 +234,8 @@ SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode)
*/
/* Normal 64-bit system call target */
-SYM_CODE_START(xen_syscall_target)
- UNWIND_HINT_EMPTY
+SYM_CODE_START(xen_entry_SYSCALL_64)
+ UNWIND_HINT_ENTRY
ENDBR
popq %rcx
popq %r11
@@ -249,13 +249,13 @@ SYM_CODE_START(xen_syscall_target)
movq $__USER_CS, 1*8(%rsp)
jmp entry_SYSCALL_64_after_hwframe
-SYM_CODE_END(xen_syscall_target)
+SYM_CODE_END(xen_entry_SYSCALL_64)
#ifdef CONFIG_IA32_EMULATION
/* 32-bit compat syscall target */
-SYM_CODE_START(xen_syscall32_target)
- UNWIND_HINT_EMPTY
+SYM_CODE_START(xen_entry_SYSCALL_compat)
+ UNWIND_HINT_ENTRY
ENDBR
popq %rcx
popq %r11
@@ -269,11 +269,11 @@ SYM_CODE_START(xen_syscall32_target)
movq $__USER32_CS, 1*8(%rsp)
jmp entry_SYSCALL_compat_after_hwframe
-SYM_CODE_END(xen_syscall32_target)
+SYM_CODE_END(xen_entry_SYSCALL_compat)
/* 32-bit compat sysenter target */
-SYM_CODE_START(xen_sysenter_target)
- UNWIND_HINT_EMPTY
+SYM_CODE_START(xen_entry_SYSENTER_compat)
+ UNWIND_HINT_ENTRY
ENDBR
/*
* NB: Xen is polite and clears TF from EFLAGS for us. This means
@@ -291,19 +291,19 @@ SYM_CODE_START(xen_sysenter_target)
movq $__USER32_CS, 1*8(%rsp)
jmp entry_SYSENTER_compat_after_hwframe
-SYM_CODE_END(xen_sysenter_target)
+SYM_CODE_END(xen_entry_SYSENTER_compat)
#else /* !CONFIG_IA32_EMULATION */
-SYM_CODE_START(xen_syscall32_target)
-SYM_CODE_START(xen_sysenter_target)
- UNWIND_HINT_EMPTY
+SYM_CODE_START(xen_entry_SYSCALL_compat)
+SYM_CODE_START(xen_entry_SYSENTER_compat)
+ UNWIND_HINT_ENTRY
ENDBR
lea 16(%rsp), %rsp /* strip %rcx, %r11 */
mov $-ENOSYS, %rax
pushq $0
jmp hypercall_iret
-SYM_CODE_END(xen_sysenter_target)
-SYM_CODE_END(xen_syscall32_target)
+SYM_CODE_END(xen_entry_SYSENTER_compat)
+SYM_CODE_END(xen_entry_SYSCALL_compat)
#endif /* CONFIG_IA32_EMULATION */
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 13af6fe453e3..ffaa62167f6e 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -26,6 +26,7 @@ SYM_CODE_START(hypercall_page)
.rept (PAGE_SIZE / 32)
UNWIND_HINT_FUNC
ANNOTATE_NOENDBR
+ ANNOTATE_UNRET_SAFE
ret
/*
* Xen will write the hypercall page, and sort out ENDBR.
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index fd0fec6e92f4..9a8bb972193d 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -10,10 +10,10 @@
/* These are code, but not functions. Defined in entry.S */
extern const char xen_failsafe_callback[];
-void xen_sysenter_target(void);
+void xen_entry_SYSENTER_compat(void);
#ifdef CONFIG_X86_64
-void xen_syscall_target(void);
-void xen_syscall32_target(void);
+void xen_entry_SYSCALL_64(void);
+void xen_entry_SYSCALL_compat(void);
#endif
extern void *xen_initial_gdt;
diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 0e3ed5eb367b..0cb20324da16 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -493,13 +493,8 @@ static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
goto skip_probe;
ret = amba_read_periphid(dev);
- if (ret) {
- if (ret != -EPROBE_DEFER) {
- amba_device_put(dev);
- goto err_out;
- }
+ if (ret)
goto err_release;
- }
skip_probe:
ret = device_add(&dev->dev);
@@ -546,6 +541,7 @@ static int amba_deferred_retry(void)
continue;
list_del_init(&ddev->node);
+ amba_device_put(ddev->dev);
kfree(ddev);
}
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index a97776ea9d99..4c98849577d4 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -570,6 +570,12 @@ ssize_t __weak cpu_show_mmio_stale_data(struct device *dev,
return sysfs_emit(buf, "Not affected\n");
}
+ssize_t __weak cpu_show_retbleed(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "Not affected\n");
+}
+
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
@@ -580,6 +586,7 @@ static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL);
static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL);
+static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL);
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_meltdown.attr,
@@ -592,6 +599,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_itlb_multihit.attr,
&dev_attr_srbds.attr,
&dev_attr_mmio_stale_data.attr,
+ &dev_attr_retbleed.attr,
NULL
};
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 0cce6e4ec946..205acb2c744d 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -343,7 +343,7 @@ void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
if (old->context != context)
continue;
- dma_resv_list_set(list, i, replacement, usage);
+ dma_resv_list_set(list, i, dma_fence_get(replacement), usage);
dma_fence_put(old);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 17c9bbe0cbc5..4dfd6724b3ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -1528,6 +1528,21 @@ bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc,
stime, etime, mode);
}
+static bool
+amdgpu_display_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_fb_helper *fb_helper = dev->fb_helper;
+
+ if (!fb_helper || !fb_helper->buffer)
+ return false;
+
+ if (gem_to_amdgpu_bo(fb_helper->buffer->gem) != robj)
+ return false;
+
+ return true;
+}
+
int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
{
struct drm_device *dev = adev_to_drm(adev);
@@ -1563,10 +1578,12 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
continue;
}
robj = gem_to_amdgpu_bo(fb->obj[0]);
- r = amdgpu_bo_reserve(robj, true);
- if (r == 0) {
- amdgpu_bo_unpin(robj);
- amdgpu_bo_unreserve(robj);
+ if (!amdgpu_display_robj_is_fb(adev, robj)) {
+ r = amdgpu_bo_reserve(robj, true);
+ if (r == 0) {
+ amdgpu_bo_unpin(robj);
+ amdgpu_bo_unreserve(robj);
+ }
}
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index 576849e95296..108e8e8a1a36 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -496,7 +496,8 @@ static int amdgpu_vkms_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = YRES_MAX;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+ /* disable prefer shadow for now due to hibernation issues */
+ adev_to_drm(adev)->mode_config.prefer_shadow = 0;
adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 288fce7dc0ed..9c964cd3b5d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -2796,7 +2796,8 @@ static int dce_v10_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+ /* disable prefer shadow for now due to hibernation issues */
+ adev_to_drm(adev)->mode_config.prefer_shadow = 0;
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index cbe5250b31cb..e0ad9f27dc3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -2914,7 +2914,8 @@ static int dce_v11_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+ /* disable prefer shadow for now due to hibernation issues */
+ adev_to_drm(adev)->mode_config.prefer_shadow = 0;
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 982855e6cf52..3caf6f386042 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -2673,7 +2673,8 @@ static int dce_v6_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_width = 16384;
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+ /* disable prefer shadow for now due to hibernation issues */
+ adev_to_drm(adev)->mode_config.prefer_shadow = 0;
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 84440741c60b..7c75df5bffed 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -2693,7 +2693,8 @@ static int dce_v8_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+ /* disable prefer shadow for now due to hibernation issues */
+ adev_to_drm(adev)->mode_config.prefer_shadow = 0;
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index b4029c0d5d8c..0ba0598eba20 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -6,7 +6,7 @@ config DRM_AMD_DC
bool "AMD DC - Enable new display engine"
default y
select SND_HDA_COMPONENT if SND_HDA_CORE
- select DRM_AMD_DC_DCN if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
+ select DRM_AMD_DC_DCN if X86 && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
help
Choose this option if you want to use the new display engine
support for AMDGPU. This adds required support for Vega and
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 9dd2e0601ea8..1c2984bbda51 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -3822,7 +3822,8 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+ /* disable prefer shadow for now due to hibernation issues */
+ adev_to_drm(adev)->mode_config.prefer_shadow = 0;
/* indicates support for immediate flip */
adev_to_drm(adev)->mode_config.async_page_flip = true;
diff --git a/drivers/gpu/drm/bridge/fsl-ldb.c b/drivers/gpu/drm/bridge/fsl-ldb.c
index b2675c769a55..4b503c544256 100644
--- a/drivers/gpu/drm/bridge/fsl-ldb.c
+++ b/drivers/gpu/drm/bridge/fsl-ldb.c
@@ -74,22 +74,6 @@ static int fsl_ldb_attach(struct drm_bridge *bridge,
bridge, flags);
}
-static int fsl_ldb_atomic_check(struct drm_bridge *bridge,
- struct drm_bridge_state *bridge_state,
- struct drm_crtc_state *crtc_state,
- struct drm_connector_state *conn_state)
-{
- /* Invert DE signal polarity. */
- bridge_state->input_bus_cfg.flags &= ~(DRM_BUS_FLAG_DE_LOW |
- DRM_BUS_FLAG_DE_HIGH);
- if (bridge_state->output_bus_cfg.flags & DRM_BUS_FLAG_DE_LOW)
- bridge_state->input_bus_cfg.flags |= DRM_BUS_FLAG_DE_HIGH;
- else if (bridge_state->output_bus_cfg.flags & DRM_BUS_FLAG_DE_HIGH)
- bridge_state->input_bus_cfg.flags |= DRM_BUS_FLAG_DE_LOW;
-
- return 0;
-}
-
static void fsl_ldb_atomic_enable(struct drm_bridge *bridge,
struct drm_bridge_state *old_bridge_state)
{
@@ -153,7 +137,7 @@ static void fsl_ldb_atomic_enable(struct drm_bridge *bridge,
reg = LDB_CTRL_CH0_ENABLE;
if (fsl_ldb->lvds_dual_link)
- reg |= LDB_CTRL_CH1_ENABLE;
+ reg |= LDB_CTRL_CH1_ENABLE | LDB_CTRL_SPLIT_MODE;
if (lvds_format_24bpp) {
reg |= LDB_CTRL_CH0_DATA_WIDTH;
@@ -233,7 +217,7 @@ fsl_ldb_mode_valid(struct drm_bridge *bridge,
{
struct fsl_ldb *fsl_ldb = to_fsl_ldb(bridge);
- if (mode->clock > (fsl_ldb->lvds_dual_link ? 80000 : 160000))
+ if (mode->clock > (fsl_ldb->lvds_dual_link ? 160000 : 80000))
return MODE_CLOCK_HIGH;
return MODE_OK;
@@ -241,7 +225,6 @@ fsl_ldb_mode_valid(struct drm_bridge *bridge,
static const struct drm_bridge_funcs funcs = {
.attach = fsl_ldb_attach,
- .atomic_check = fsl_ldb_atomic_check,
.atomic_enable = fsl_ldb_atomic_enable,
.atomic_disable = fsl_ldb_atomic_disable,
.atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state,
diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c
index df87ba99a87c..d4e0f2e85548 100644
--- a/drivers/gpu/drm/drm_panel_orientation_quirks.c
+++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c
@@ -286,6 +286,21 @@ static const struct dmi_system_id orientation_data[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9"),
},
.driver_data = (void *)&lcd1200x1920_rightside_up,
+ }, { /* Lenovo Yoga Tablet 2 830F / 830L */
+ .matches = {
+ /*
+ * Note this also matches the Lenovo Yoga Tablet 2 1050F/L
+ * since that uses the same mainboard. The resolution match
+ * will limit this to only matching on the 830F/L. Neither has
+ * any external video outputs so those are not a concern.
+ */
+ DMI_MATCH(DMI_SYS_VENDOR, "Intel Corp."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "VALLEYVIEW C0 PLATFORM"),
+ DMI_MATCH(DMI_BOARD_NAME, "BYT-T FFD8"),
+ /* Partial match on beginning of BIOS version */
+ DMI_MATCH(DMI_BIOS_VERSION, "BLADE_21"),
+ },
+ .driver_data = (void *)&lcd1200x1920_rightside_up,
}, { /* OneGX1 Pro */
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SYSTEM_MANUFACTURER"),
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 061b277e5ce7..14d2a64193b2 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -839,6 +839,7 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo
ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs,
DRM_MODE_CONNECTOR_DisplayPort);
if (ret) {
+ drm_dp_mst_put_port_malloc(port);
intel_connector_free(intel_connector);
return NULL;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index f0d7b57b741e..2ff55b9994bc 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -162,6 +162,15 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
u8 rev = INTEL_REVID(i915);
int i;
+ /*
+ * The only difference between the ADL GuC FWs is the HWConfig support.
+ * ADL-N does not support HWConfig, so we should use the same binary as
+ * ADL-S, otherwise the GuC might attempt to fetch a config table that
+ * does not exist.
+ */
+ if (IS_ADLP_N(i915))
+ p = INTEL_ALDERLAKE_S;
+
GEM_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all));
fw_blobs = blobs_all[uc_fw->type].blobs;
fw_count = blobs_all[uc_fw->type].count;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 0bffb70b3c5f..04d12f278f57 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -1637,10 +1637,10 @@ static void force_unbind(struct i915_vma *vma)
GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
}
-static void release_references(struct i915_vma *vma, bool vm_ddestroy)
+static void release_references(struct i915_vma *vma, struct intel_gt *gt,
+ bool vm_ddestroy)
{
struct drm_i915_gem_object *obj = vma->obj;
- struct intel_gt *gt = vma->vm->gt;
GEM_BUG_ON(i915_vma_is_active(vma));
@@ -1695,11 +1695,12 @@ void i915_vma_destroy_locked(struct i915_vma *vma)
force_unbind(vma);
list_del_init(&vma->vm_link);
- release_references(vma, false);
+ release_references(vma, vma->vm->gt, false);
}
void i915_vma_destroy(struct i915_vma *vma)
{
+ struct intel_gt *gt;
bool vm_ddestroy;
mutex_lock(&vma->vm->mutex);
@@ -1707,8 +1708,11 @@ void i915_vma_destroy(struct i915_vma *vma)
list_del_init(&vma->vm_link);
vm_ddestroy = vma->vm_ddestroy;
vma->vm_ddestroy = false;
+
+ /* vma->vm may be freed when releasing vma->vm->mutex. */
+ gt = vma->vm->gt;
mutex_unlock(&vma->vm->mutex);
- release_references(vma, vm_ddestroy);
+ release_references(vma, gt, vm_ddestroy);
}
void i915_vma_parked(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 087e69b98d06..b1e6d238674f 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -433,8 +433,8 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, void *data,
if (args->retained) {
if (args->madv == PANFROST_MADV_DONTNEED)
- list_add_tail(&bo->base.madv_list,
- &pfdev->shrinker_list);
+ list_move_tail(&bo->base.madv_list,
+ &pfdev->shrinker_list);
else if (args->madv == PANFROST_MADV_WILLNEED)
list_del_init(&bo->base.madv_list);
}
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index d3f82b26a631..b285a8001b1d 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -518,7 +518,7 @@ err_map:
err_pages:
drm_gem_shmem_put_pages(&bo->base);
err_bo:
- drm_gem_object_put(&bo->base.base);
+ panfrost_gem_mapping_put(bomapping);
return ret;
}
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index 67d38f53d3e5..13ed33e74457 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -23,6 +23,14 @@
#include <drm/drm_probe_helper.h>
#include <drm/drm_vblank.h>
+#if defined(CONFIG_ARM_DMA_USE_IOMMU)
+#include <asm/dma-iommu.h>
+#else
+#define arm_iommu_detach_device(...) ({ })
+#define arm_iommu_release_mapping(...) ({ })
+#define to_dma_iommu_mapping(dev) NULL
+#endif
+
#include "rockchip_drm_drv.h"
#include "rockchip_drm_fb.h"
#include "rockchip_drm_gem.h"
@@ -49,6 +57,15 @@ int rockchip_drm_dma_attach_device(struct drm_device *drm_dev,
if (!private->domain)
return 0;
+ if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) {
+ struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
+
+ if (mapping) {
+ arm_iommu_detach_device(dev);
+ arm_iommu_release_mapping(mapping);
+ }
+ }
+
ret = iommu_attach_device(private->domain, dev);
if (ret) {
DRM_DEV_ERROR(dev, "Failed to attach iommu device\n");
diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c
index 08394444dd6e..f4886e66ff34 100644
--- a/drivers/gpu/drm/solomon/ssd130x.c
+++ b/drivers/gpu/drm/solomon/ssd130x.c
@@ -350,7 +350,7 @@ static int ssd130x_init(struct ssd130x_device *ssd130x)
/* Set precharge period in number of ticks from the internal clock */
precharge = (SSD130X_SET_PRECHARGE_PERIOD1_SET(ssd130x->prechargep1) |
- SSD130X_SET_PRECHARGE_PERIOD1_SET(ssd130x->prechargep2));
+ SSD130X_SET_PRECHARGE_PERIOD2_SET(ssd130x->prechargep2));
ret = ssd130x_write_cmd(ssd130x, 2, SSD130X_SET_PRECHARGE_PERIOD, precharge);
if (ret < 0)
return ret;
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 424ef470223d..f5c6802aa6c3 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -47,11 +47,13 @@
#include <linux/tick.h>
#include <trace/events/power.h>
#include <linux/sched.h>
+#include <linux/sched/smt.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/moduleparam.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
+#include <asm/nospec-branch.h>
#include <asm/mwait.h>
#include <asm/msr.h>
@@ -106,6 +108,12 @@ static unsigned int mwait_substates __initdata;
#define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15)
/*
+ * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
+ * above.
+ */
+#define CPUIDLE_FLAG_IBRS BIT(16)
+
+/*
* MWAIT takes an 8-bit "hint" in EAX "suggesting"
* the C-state (top nibble) and sub-state (bottom nibble)
* 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
@@ -159,6 +167,24 @@ static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
return ret;
}
+static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ bool smt_active = sched_smt_active();
+ u64 spec_ctrl = spec_ctrl_current();
+ int ret;
+
+ if (smt_active)
+ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+
+ ret = __intel_idle(dev, drv, index);
+
+ if (smt_active)
+ wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
+
+ return ret;
+}
+
/**
* intel_idle_s2idle - Ask the processor to enter the given idle state.
* @dev: cpuidle device of the target CPU.
@@ -680,7 +706,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
{
.name = "C6",
.desc = "MWAIT 0x20",
- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 85,
.target_residency = 200,
.enter = &intel_idle,
@@ -688,7 +714,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
{
.name = "C7s",
.desc = "MWAIT 0x33",
- .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 124,
.target_residency = 800,
.enter = &intel_idle,
@@ -696,7 +722,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
{
.name = "C8",
.desc = "MWAIT 0x40",
- .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 200,
.target_residency = 800,
.enter = &intel_idle,
@@ -704,7 +730,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
{
.name = "C9",
.desc = "MWAIT 0x50",
- .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 480,
.target_residency = 5000,
.enter = &intel_idle,
@@ -712,7 +738,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
{
.name = "C10",
.desc = "MWAIT 0x60",
- .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 890,
.target_residency = 5000,
.enter = &intel_idle,
@@ -741,7 +767,7 @@ static struct cpuidle_state skx_cstates[] __initdata = {
{
.name = "C6",
.desc = "MWAIT 0x20",
- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 133,
.target_residency = 600,
.enter = &intel_idle,
@@ -1819,6 +1845,12 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE)
drv->states[drv->state_count].enter = intel_idle_irq;
+ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
+ cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
+ WARN_ON_ONCE(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE);
+ drv->states[drv->state_count].enter = intel_idle_ibrs;
+ }
+
if ((disabled_states_mask & BIT(drv->state_count)) ||
((icpu->use_acpi || force_use_acpi) &&
intel_idle_off_by_default(mwait_hint) &&
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 61e71c1154be..e60b06f2ac22 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -549,6 +549,16 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
if (!iommu_group)
return ERR_PTR(-EINVAL);
+ /*
+ * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
+ * restore cache coherency. It has to be checked here because it is only
+ * valid for cases where we are using iommu groups.
+ */
+ if (!iommu_capable(dev->bus, IOMMU_CAP_CACHE_COHERENCY)) {
+ iommu_group_put(iommu_group);
+ return ERR_PTR(-EINVAL);
+ }
+
group = vfio_group_get_from_iommu(iommu_group);
if (!group)
group = vfio_create_group(iommu_group, VFIO_IOMMU);
@@ -601,13 +611,6 @@ static int __vfio_register_dev(struct vfio_device *device,
int vfio_register_group_dev(struct vfio_device *device)
{
- /*
- * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
- * restore cache coherency.
- */
- if (!iommu_capable(device->dev->bus, IOMMU_CAP_CACHE_COHERENCY))
- return -EINVAL;
-
return __vfio_register_dev(device,
vfio_group_find_or_alloc(device->dev));
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 05e0c4a5affd..0172f75e051a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7681,7 +7681,19 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
em->block_start == EXTENT_MAP_INLINE) {
free_extent_map(em);
- ret = -ENOTBLK;
+ /*
+ * If we are in a NOWAIT context, return -EAGAIN in order to
+ * fallback to buffered IO. This is not only because we can
+ * block with buffered IO (no support for NOWAIT semantics at
+ * the moment) but also to avoid returning short reads to user
+ * space - this happens if we were able to read some data from
+ * previous non-compressed extents and then when we fallback to
+ * buffered IO, at btrfs_file_read_iter() by calling
+ * filemap_read(), we fail to fault in pages for the read buffer,
+ * in which case filemap_read() returns a short read (the number
+ * of bytes previously read is > 0, so it does not return -EFAULT).
+ */
+ ret = (flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOTBLK;
goto unlock_err;
}
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 79e8c8cd75ed..d99026df6f67 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1735,12 +1735,14 @@ static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical,
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
&mapped_length, &bioc);
if (ret || !bioc || mapped_length < PAGE_SIZE) {
- btrfs_put_bioc(bioc);
- return -EIO;
+ ret = -EIO;
+ goto out_put_bioc;
}
- if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK)
- return -EINVAL;
+ if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+ ret = -EINVAL;
+ goto out_put_bioc;
+ }
nofs_flag = memalloc_nofs_save();
nmirrors = (int)bioc->num_stripes;
@@ -1759,7 +1761,8 @@ static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical,
break;
}
memalloc_nofs_restore(nofs_flag);
-
+out_put_bioc:
+ btrfs_put_bioc(bioc);
return ret;
}
@@ -1885,7 +1888,6 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct map_lookup *map;
- bool need_zone_finish;
int ret = 0;
int i;
@@ -1942,12 +1944,6 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
}
}
- /*
- * The block group is not fully allocated, so not fully written yet. We
- * need to send ZONE_FINISH command to free up an active zone.
- */
- need_zone_finish = !btrfs_zoned_bg_is_full(block_group);
-
block_group->zone_is_active = 0;
block_group->alloc_offset = block_group->zone_capacity;
block_group->free_space_ctl->free_space = 0;
@@ -1963,15 +1959,13 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
if (device->zone_info->max_active_zones == 0)
continue;
- if (need_zone_finish) {
- ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
- physical >> SECTOR_SHIFT,
- device->zone_info->zone_size >> SECTOR_SHIFT,
- GFP_NOFS);
+ ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
+ physical >> SECTOR_SHIFT,
+ device->zone_info->zone_size >> SECTOR_SHIFT,
+ GFP_NOFS);
- if (ret)
- return ret;
- }
+ if (ret)
+ return ret;
btrfs_dev_clear_active_zone(device, physical);
}
diff --git a/fs/exec.c b/fs/exec.c
index 0989fb8472a1..778123259e42 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1301,7 +1301,7 @@ int begin_new_exec(struct linux_binprm * bprm)
bprm->mm = NULL;
#ifdef CONFIG_POSIX_TIMERS
- exit_itimers(me->signal);
+ exit_itimers(me);
flush_itimer_signals();
#endif
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 0a22a2faf552..e1c4617de771 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -176,7 +176,7 @@ nlm_delete_file(struct nlm_file *file)
}
}
-static int nlm_unlock_files(struct nlm_file *file)
+static int nlm_unlock_files(struct nlm_file *file, fl_owner_t owner)
{
struct file_lock lock;
@@ -184,6 +184,7 @@ static int nlm_unlock_files(struct nlm_file *file)
lock.fl_type = F_UNLCK;
lock.fl_start = 0;
lock.fl_end = OFFSET_MAX;
+ lock.fl_owner = owner;
if (file->f_file[O_RDONLY] &&
vfs_lock_file(file->f_file[O_RDONLY], F_SETLK, &lock, NULL))
goto out_err;
@@ -225,7 +226,7 @@ again:
if (match(lockhost, host)) {
spin_unlock(&flctx->flc_lock);
- if (nlm_unlock_files(file))
+ if (nlm_unlock_files(file, fl->fl_owner))
return 1;
goto again;
}
@@ -282,11 +283,10 @@ nlm_file_inuse(struct nlm_file *file)
static void nlm_close_files(struct nlm_file *file)
{
- struct file *f;
-
- for (f = file->f_file[0]; f <= file->f_file[1]; f++)
- if (f)
- nlmsvc_ops->fclose(f);
+ if (file->f_file[O_RDONLY])
+ nlmsvc_ops->fclose(file->f_file[O_RDONLY]);
+ if (file->f_file[O_WRONLY])
+ nlmsvc_ops->fclose(file->f_file[O_WRONLY]);
}
/*
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 61b2aae81abb..2acea7792bb2 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -470,6 +470,15 @@ nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen,
return nfserr_bad_xdr;
}
}
+ if (bmval[1] & FATTR4_WORD1_TIME_CREATE) {
+ struct timespec64 ts;
+
+ /* No Linux filesystem supports setting this attribute. */
+ bmval[1] &= ~FATTR4_WORD1_TIME_CREATE;
+ status = nfsd4_decode_nfstime4(argp, &ts);
+ if (status)
+ return status;
+ }
if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
u32 set_it;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 847b482155ae..9a8b09afc173 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -465,7 +465,8 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL)
#define NFSD_WRITEABLE_ATTRS_WORD1 \
(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
- | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
+ | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_CREATE \
+ | FATTR4_WORD1_TIME_MODIFY_SET)
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
#define MAYBE_FATTR4_WORD2_SECURITY_LABEL \
FATTR4_WORD2_SECURITY_LABEL
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 1344f7d475d3..aecda4fc95f5 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -198,6 +198,9 @@ static inline int nilfs_acl_chmod(struct inode *inode)
static inline int nilfs_init_acl(struct inode *inode, struct inode *dir)
{
+ if (S_ISLNK(inode->i_mode))
+ return 0;
+
inode->i_mode &= ~current_umask();
return 0;
}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index e0a2e0468ee7..1ce5c9698393 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1003,6 +1003,9 @@ ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
struct dentry *dentry, struct inode *inode,
const char *name, void *buffer, size_t size)
{
+ if (!IS_POSIXACL(inode))
+ return -EOPNOTSUPP;
+
return ovl_xattr_get(dentry, inode, handler->name, buffer, size);
}
@@ -1018,6 +1021,9 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
struct posix_acl *acl = NULL;
int err;
+ if (!IS_POSIXACL(inode))
+ return -EOPNOTSUPP;
+
/* Check that everything is OK before copy-up */
if (value) {
acl = posix_acl_from_xattr(&init_user_ns, value, size);
@@ -1960,6 +1966,20 @@ static struct dentry *ovl_get_root(struct super_block *sb,
return root;
}
+static bool ovl_has_idmapped_layers(struct ovl_fs *ofs)
+{
+
+ unsigned int i;
+ const struct vfsmount *mnt;
+
+ for (i = 0; i < ofs->numlayer; i++) {
+ mnt = ofs->layers[i].mnt;
+ if (mnt && is_idmapped_mnt(mnt))
+ return true;
+ }
+ return false;
+}
+
static int ovl_fill_super(struct super_block *sb, void *data, int silent)
{
struct path upperpath = { };
@@ -2129,7 +2149,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers :
ovl_trusted_xattr_handlers;
sb->s_fs_info = ofs;
- sb->s_flags |= SB_POSIXACL;
+ if (ovl_has_idmapped_layers(ofs))
+ pr_warn("POSIX ACLs are not yet supported with idmapped layers, mounting without ACL support.\n");
+ else
+ sb->s_flags |= SB_POSIXACL;
sb->s_iflags |= SB_I_SKIP_SYNC;
err = -ENOMEM;
diff --git a/fs/remap_range.c b/fs/remap_range.c
index e112b5424cdb..5e0d97e02f96 100644
--- a/fs/remap_range.c
+++ b/fs/remap_range.c
@@ -71,7 +71,8 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in,
* Otherwise, make sure the count is also block-aligned, having
* already confirmed the starting offsets' block alignment.
*/
- if (pos_in + count == size_in) {
+ if (pos_in + count == size_in &&
+ (!(remap_flags & REMAP_FILE_DEDUP) || pos_out + count == size_out)) {
bcount = ALIGN(size_in, bs) - pos_in;
} else {
if (!IS_ALIGNED(count, bs))
@@ -546,7 +547,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
else if (deduped < 0)
info->status = deduped;
else
- info->bytes_deduped = len;
+ info->bytes_deduped = deduped;
next_fdput:
fdput(dst_fd);
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 1bfcfb1af352..d4427d0a0e18 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -264,7 +264,8 @@ struct css_set {
* List of csets participating in the on-going migration either as
* source or destination. Protected by cgroup_mutex.
*/
- struct list_head mg_preload_node;
+ struct list_head mg_src_preload_node;
+ struct list_head mg_dst_preload_node;
struct list_head mg_node;
/*
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 2c7477354744..314802f98b9d 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -68,6 +68,8 @@ extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr,
extern ssize_t cpu_show_mmio_stale_data(struct device *dev,
struct device_attribute *attr,
char *buf);
+extern ssize_t cpu_show_retbleed(struct device *dev,
+ struct device_attribute *attr, char *buf);
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 3af34de54330..56d6a0196534 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -149,19 +149,19 @@ static inline void *kmap_local_folio(struct folio *folio, size_t offset);
* It is used in atomic context when code wants to access the contents of a
* page that might be allocated from high memory (see __GFP_HIGHMEM), for
* example a page in the pagecache. The API has two functions, and they
- * can be used in a manner similar to the following:
+ * can be used in a manner similar to the following::
*
- * -- Find the page of interest. --
- * struct page *page = find_get_page(mapping, offset);
+ * // Find the page of interest.
+ * struct page *page = find_get_page(mapping, offset);
*
- * -- Gain access to the contents of that page. --
- * void *vaddr = kmap_atomic(page);
+ * // Gain access to the contents of that page.
+ * void *vaddr = kmap_atomic(page);
*
- * -- Do something to the contents of that page. --
- * memset(vaddr, 0, PAGE_SIZE);
+ * // Do something to the contents of that page.
+ * memset(vaddr, 0, PAGE_SIZE);
*
- * -- Unmap that page. --
- * kunmap_atomic(vaddr);
+ * // Unmap that page.
+ * kunmap_atomic(vaddr);
*
* Note that the kunmap_atomic() call takes the result of the kmap_atomic()
* call, not the argument.
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index ce6536f1d269..475683cd67f1 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -452,6 +452,12 @@ static inline int kexec_crash_loaded(void) { return 0; }
#define kexec_in_progress false
#endif /* CONFIG_KEXEC_CORE */
+#ifdef CONFIG_KEXEC_SIG
+void set_kexec_sig_enforced(void);
+#else
+static inline void set_kexec_sig_enforced(void) {}
+#endif
+
#endif /* !defined(__ASSEBMLY__) */
#endif /* LINUX_KEXEC_H */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c20f2d55840c..83cf7fd842e0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1513,7 +1513,7 @@ static inline void kvm_arch_end_assignment(struct kvm *kvm)
{
}
-static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
+static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
{
return false;
}
diff --git a/include/linux/objtool.h b/include/linux/objtool.h
index 15b940ec1eac..10bc88cc3bf6 100644
--- a/include/linux/objtool.h
+++ b/include/linux/objtool.h
@@ -32,11 +32,16 @@ struct unwind_hint {
*
* UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
* Useful for code which doesn't have an ELF function annotation.
+ *
+ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc.
*/
#define UNWIND_HINT_TYPE_CALL 0
#define UNWIND_HINT_TYPE_REGS 1
#define UNWIND_HINT_TYPE_REGS_PARTIAL 2
#define UNWIND_HINT_TYPE_FUNC 3
+#define UNWIND_HINT_TYPE_ENTRY 4
+#define UNWIND_HINT_TYPE_SAVE 5
+#define UNWIND_HINT_TYPE_RESTORE 6
#ifdef CONFIG_OBJTOOL
@@ -124,7 +129,7 @@ struct unwind_hint {
* the debuginfo as necessary. It will also warn if it sees any
* inconsistencies.
*/
-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
.Lunwind_hint_ip_\@:
.pushsection .discard.unwind_hints
/* struct unwind_hint */
@@ -177,7 +182,7 @@ struct unwind_hint {
#define ASM_REACHABLE
#else
#define ANNOTATE_INTRA_FUNCTION_CALL
-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
.endm
.macro STACK_FRAME_NON_STANDARD func:req
.endm
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 505aaf9fe477..81cab4b01edc 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -85,7 +85,7 @@ static inline void exit_thread(struct task_struct *tsk)
extern __noreturn void do_group_exit(int);
extern void exit_files(struct task_struct *);
-extern void exit_itimers(struct signal_struct *);
+extern void exit_itimers(struct task_struct *);
extern pid_t kernel_clone(struct kernel_clone_args *kargs);
struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node);
diff --git a/include/trace/events/iocost.h b/include/trace/events/iocost.h
index e282ce02fa2d..6d1626e7a4ce 100644
--- a/include/trace/events/iocost.h
+++ b/include/trace/events/iocost.h
@@ -160,7 +160,7 @@ TRACE_EVENT(iocost_ioc_vrate_adj,
TP_fast_assign(
__assign_str(devname, ioc_name(ioc));
- __entry->old_vrate = atomic64_read(&ioc->vtime_rate);;
+ __entry->old_vrate = atomic64_read(&ioc->vtime_rate);
__entry->new_vrate = new_vrate;
__entry->busy_level = ioc->busy_level;
__entry->read_missed_ppm = missed_ppm[READ];
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 754f3237194a..e1fcaedba4fa 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -64,7 +64,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
goto fail_put;
if (!setup_ipc_sysctls(ns))
- goto fail_put;
+ goto fail_mq;
sem_init_ns(ns);
msg_init_ns(ns);
@@ -72,6 +72,9 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
return ns;
+fail_mq:
+ retire_mq_sysctls(ns);
+
fail_put:
put_user_ns(ns->user_ns);
ns_free_inum(&ns->ns);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 1779ccddb734..13c8e91d7862 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -765,7 +765,8 @@ struct css_set init_css_set = {
.task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
.threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets),
.cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
- .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node),
+ .mg_src_preload_node = LIST_HEAD_INIT(init_css_set.mg_src_preload_node),
+ .mg_dst_preload_node = LIST_HEAD_INIT(init_css_set.mg_dst_preload_node),
.mg_node = LIST_HEAD_INIT(init_css_set.mg_node),
/*
@@ -1240,7 +1241,8 @@ static struct css_set *find_css_set(struct css_set *old_cset,
INIT_LIST_HEAD(&cset->threaded_csets);
INIT_HLIST_NODE(&cset->hlist);
INIT_LIST_HEAD(&cset->cgrp_links);
- INIT_LIST_HEAD(&cset->mg_preload_node);
+ INIT_LIST_HEAD(&cset->mg_src_preload_node);
+ INIT_LIST_HEAD(&cset->mg_dst_preload_node);
INIT_LIST_HEAD(&cset->mg_node);
/* Copy the set of subsystem state objects generated in
@@ -2597,21 +2599,27 @@ int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp)
*/
void cgroup_migrate_finish(struct cgroup_mgctx *mgctx)
{
- LIST_HEAD(preloaded);
struct css_set *cset, *tmp_cset;
lockdep_assert_held(&cgroup_mutex);
spin_lock_irq(&css_set_lock);
- list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded);
- list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded);
+ list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_src_csets,
+ mg_src_preload_node) {
+ cset->mg_src_cgrp = NULL;
+ cset->mg_dst_cgrp = NULL;
+ cset->mg_dst_cset = NULL;
+ list_del_init(&cset->mg_src_preload_node);
+ put_css_set_locked(cset);
+ }
- list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) {
+ list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_dst_csets,
+ mg_dst_preload_node) {
cset->mg_src_cgrp = NULL;
cset->mg_dst_cgrp = NULL;
cset->mg_dst_cset = NULL;
- list_del_init(&cset->mg_preload_node);
+ list_del_init(&cset->mg_dst_preload_node);
put_css_set_locked(cset);
}
@@ -2651,7 +2659,7 @@ void cgroup_migrate_add_src(struct css_set *src_cset,
if (src_cset->dead)
return;
- if (!list_empty(&src_cset->mg_preload_node))
+ if (!list_empty(&src_cset->mg_src_preload_node))
return;
src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
@@ -2664,7 +2672,7 @@ void cgroup_migrate_add_src(struct css_set *src_cset,
src_cset->mg_src_cgrp = src_cgrp;
src_cset->mg_dst_cgrp = dst_cgrp;
get_css_set(src_cset);
- list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets);
+ list_add_tail(&src_cset->mg_src_preload_node, &mgctx->preloaded_src_csets);
}
/**
@@ -2689,7 +2697,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
/* look up the dst cset for each src cset and link it to src */
list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets,
- mg_preload_node) {
+ mg_src_preload_node) {
struct css_set *dst_cset;
struct cgroup_subsys *ss;
int ssid;
@@ -2708,7 +2716,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
if (src_cset == dst_cset) {
src_cset->mg_src_cgrp = NULL;
src_cset->mg_dst_cgrp = NULL;
- list_del_init(&src_cset->mg_preload_node);
+ list_del_init(&src_cset->mg_src_preload_node);
put_css_set(src_cset);
put_css_set(dst_cset);
continue;
@@ -2716,8 +2724,8 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
src_cset->mg_dst_cset = dst_cset;
- if (list_empty(&dst_cset->mg_preload_node))
- list_add_tail(&dst_cset->mg_preload_node,
+ if (list_empty(&dst_cset->mg_dst_preload_node))
+ list_add_tail(&dst_cset->mg_dst_preload_node,
&mgctx->preloaded_dst_csets);
else
put_css_set(dst_cset);
@@ -2963,7 +2971,8 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
goto out_finish;
spin_lock_irq(&css_set_lock);
- list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) {
+ list_for_each_entry(src_cset, &mgctx.preloaded_src_csets,
+ mg_src_preload_node) {
struct task_struct *task, *ntask;
/* all tasks in src_csets need to be migrated */
diff --git a/kernel/exit.c b/kernel/exit.c
index f072959fcab7..64c938ce36fe 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -766,7 +766,7 @@ void __noreturn do_exit(long code)
#ifdef CONFIG_POSIX_TIMERS
hrtimer_cancel(&tsk->signal->real_timer);
- exit_itimers(tsk->signal);
+ exit_itimers(tsk);
#endif
if (tsk->mm)
setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 145321a5e798..f9261c07b048 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -29,6 +29,15 @@
#include <linux/vmalloc.h>
#include "kexec_internal.h"
+#ifdef CONFIG_KEXEC_SIG
+static bool sig_enforce = IS_ENABLED(CONFIG_KEXEC_SIG_FORCE);
+
+void set_kexec_sig_enforced(void)
+{
+ sig_enforce = true;
+}
+#endif
+
static int kexec_calculate_store_digests(struct kimage *image);
/*
@@ -159,7 +168,7 @@ kimage_validate_signature(struct kimage *image)
image->kernel_buf_len);
if (ret) {
- if (IS_ENABLED(CONFIG_KEXEC_SIG_FORCE)) {
+ if (sig_enforce) {
pr_notice("Enforced kernel signature verification failed (%d).\n", ret);
return ret;
}
diff --git a/kernel/module/internal.h b/kernel/module/internal.h
index bc5507ab8450..ec104c2950c3 100644
--- a/kernel/module/internal.h
+++ b/kernel/module/internal.h
@@ -11,6 +11,7 @@
#include <linux/mutex.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
+#include <linux/mm.h>
#ifndef ARCH_SHF_SMALL
#define ARCH_SHF_SMALL 0
@@ -30,11 +31,13 @@
* to ensure complete separation of code and data, but
* only when CONFIG_STRICT_MODULE_RWX=y
*/
-#ifdef CONFIG_STRICT_MODULE_RWX
-# define strict_align(X) PAGE_ALIGN(X)
-#else
-# define strict_align(X) (X)
-#endif
+static inline unsigned int strict_align(unsigned int size)
+{
+ if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
+ return PAGE_ALIGN(size);
+ else
+ return size;
+}
extern struct mutex module_mutex;
extern struct list_head modules;
diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c
index 3e11523bc6f6..77e75bead569 100644
--- a/kernel/module/kallsyms.c
+++ b/kernel/module/kallsyms.c
@@ -137,6 +137,7 @@ void layout_symtab(struct module *mod, struct load_info *info)
info->symoffs = ALIGN(mod->data_layout.size, symsect->sh_addralign ?: 1);
info->stroffs = mod->data_layout.size = info->symoffs + ndst * sizeof(Elf_Sym);
mod->data_layout.size += strtab_size;
+ /* Note add_kallsyms() computes strtab_size as core_typeoffs - stroffs */
info->core_typeoffs = mod->data_layout.size;
mod->data_layout.size += ndst * sizeof(char);
mod->data_layout.size = strict_align(mod->data_layout.size);
@@ -169,19 +170,20 @@ void add_kallsyms(struct module *mod, const struct load_info *info)
Elf_Sym *dst;
char *s;
Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
+ unsigned long strtab_size;
/* Set up to point into init section. */
mod->kallsyms = (void __rcu *)mod->init_layout.base +
info->mod_kallsyms_init_off;
- preempt_disable();
+ rcu_read_lock();
/* The following is safe since this pointer cannot change */
- rcu_dereference_sched(mod->kallsyms)->symtab = (void *)symsec->sh_addr;
- rcu_dereference_sched(mod->kallsyms)->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
+ rcu_dereference(mod->kallsyms)->symtab = (void *)symsec->sh_addr;
+ rcu_dereference(mod->kallsyms)->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
/* Make sure we get permanent strtab: don't use info->strtab. */
- rcu_dereference_sched(mod->kallsyms)->strtab =
+ rcu_dereference(mod->kallsyms)->strtab =
(void *)info->sechdrs[info->index.str].sh_addr;
- rcu_dereference_sched(mod->kallsyms)->typetab = mod->init_layout.base + info->init_typeoffs;
+ rcu_dereference(mod->kallsyms)->typetab = mod->init_layout.base + info->init_typeoffs;
/*
* Now populate the cut down core kallsyms for after init
@@ -190,22 +192,29 @@ void add_kallsyms(struct module *mod, const struct load_info *info)
mod->core_kallsyms.symtab = dst = mod->data_layout.base + info->symoffs;
mod->core_kallsyms.strtab = s = mod->data_layout.base + info->stroffs;
mod->core_kallsyms.typetab = mod->data_layout.base + info->core_typeoffs;
- src = rcu_dereference_sched(mod->kallsyms)->symtab;
- for (ndst = i = 0; i < rcu_dereference_sched(mod->kallsyms)->num_symtab; i++) {
- rcu_dereference_sched(mod->kallsyms)->typetab[i] = elf_type(src + i, info);
+ strtab_size = info->core_typeoffs - info->stroffs;
+ src = rcu_dereference(mod->kallsyms)->symtab;
+ for (ndst = i = 0; i < rcu_dereference(mod->kallsyms)->num_symtab; i++) {
+ rcu_dereference(mod->kallsyms)->typetab[i] = elf_type(src + i, info);
if (i == 0 || is_livepatch_module(mod) ||
is_core_symbol(src + i, info->sechdrs, info->hdr->e_shnum,
info->index.pcpu)) {
+ ssize_t ret;
+
mod->core_kallsyms.typetab[ndst] =
- rcu_dereference_sched(mod->kallsyms)->typetab[i];
+ rcu_dereference(mod->kallsyms)->typetab[i];
dst[ndst] = src[i];
dst[ndst++].st_name = s - mod->core_kallsyms.strtab;
- s += strscpy(s,
- &rcu_dereference_sched(mod->kallsyms)->strtab[src[i].st_name],
- KSYM_NAME_LEN) + 1;
+ ret = strscpy(s,
+ &rcu_dereference(mod->kallsyms)->strtab[src[i].st_name],
+ strtab_size);
+ if (ret < 0)
+ break;
+ s += ret + 1;
+ strtab_size -= ret + 1;
}
}
- preempt_enable();
+ rcu_read_unlock();
mod->core_kallsyms.num_symtab = ndst;
}
diff --git a/kernel/module/main.c b/kernel/module/main.c
index fed58d30725d..0548151dd933 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -2939,24 +2939,25 @@ static void cfi_init(struct module *mod)
{
#ifdef CONFIG_CFI_CLANG
initcall_t *init;
+#ifdef CONFIG_MODULE_UNLOAD
exitcall_t *exit;
+#endif
rcu_read_lock_sched();
mod->cfi_check = (cfi_check_fn)
find_kallsyms_symbol_value(mod, "__cfi_check");
init = (initcall_t *)
find_kallsyms_symbol_value(mod, "__cfi_jt_init_module");
- exit = (exitcall_t *)
- find_kallsyms_symbol_value(mod, "__cfi_jt_cleanup_module");
- rcu_read_unlock_sched();
-
/* Fix init/exit functions to point to the CFI jump table */
if (init)
mod->init = *init;
#ifdef CONFIG_MODULE_UNLOAD
+ exit = (exitcall_t *)
+ find_kallsyms_symbol_value(mod, "__cfi_jt_cleanup_module");
if (exit)
mod->exit = *exit;
#endif
+ rcu_read_unlock_sched();
cfi_module_add(mod, mod_tree.addr_min);
#endif
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 1cd10b102c51..5dead89308b7 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -1051,15 +1051,24 @@ retry_delete:
}
/*
- * This is called by do_exit or de_thread, only when there are no more
- * references to the shared signal_struct.
+ * This is called by do_exit or de_thread, only when nobody else can
+ * modify the signal->posix_timers list. Yet we need sighand->siglock
+ * to prevent the race with /proc/pid/timers.
*/
-void exit_itimers(struct signal_struct *sig)
+void exit_itimers(struct task_struct *tsk)
{
+ struct list_head timers;
struct k_itimer *tmr;
- while (!list_empty(&sig->posix_timers)) {
- tmr = list_entry(sig->posix_timers.next, struct k_itimer, list);
+ if (list_empty(&tsk->signal->posix_timers))
+ return;
+
+ spin_lock_irq(&tsk->sighand->siglock);
+ list_replace_init(&tsk->signal->posix_timers, &timers);
+ spin_unlock_irq(&tsk->sighand->siglock);
+
+ while (!list_empty(&timers)) {
+ tmr = list_first_entry(&timers, struct k_itimer, list);
itimer_delete(tmr);
}
}
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index debbbb083286..ccd6a5ade3e9 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -194,7 +194,8 @@ config FUNCTION_TRACER
sequence is then dynamically patched into a tracer call when
tracing is enabled by the administrator. If it's runtime disabled
(the bootup default), then the overhead of the instructions is very
- small and not measurable even in micro-benchmarks.
+ small and not measurable even in micro-benchmarks (at least on
+ x86, but may have impact on other architectures).
config FUNCTION_GRAPH_TRACER
bool "Kernel Function Graph Tracer"
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a8cfac0611bc..b8dd54627075 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -9864,6 +9864,12 @@ void trace_init_global_iter(struct trace_iterator *iter)
/* Output in nanoseconds only if we are using a clock in nanoseconds. */
if (trace_clocks[iter->tr->clock_id].in_ns)
iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
+
+ /* Can not use kmalloc for iter.temp and iter.fmt */
+ iter->temp = static_temp_buf;
+ iter->temp_size = STATIC_TEMP_BUF_SIZE;
+ iter->fmt = static_fmt_buf;
+ iter->fmt_size = STATIC_FMT_BUF_SIZE;
}
void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
@@ -9896,11 +9902,6 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
/* Simulate the iterator */
trace_init_global_iter(&iter);
- /* Can not use kmalloc for iter.temp and iter.fmt */
- iter.temp = static_temp_buf;
- iter.temp_size = STATIC_TEMP_BUF_SIZE;
- iter.fmt = static_fmt_buf;
- iter.fmt_size = STATIC_FMT_BUF_SIZE;
for_each_tracing_cpu(cpu) {
atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 48e82e141d54..e87a46794079 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -4430,6 +4430,8 @@ static int parse_var_defs(struct hist_trigger_data *hist_data)
s = kstrdup(field_str, GFP_KERNEL);
if (!s) {
+ kfree(hist_data->attrs->var_defs.name[n_vars]);
+ hist_data->attrs->var_defs.name[n_vars] = NULL;
ret = -ENOMEM;
goto free;
}
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 59e1653799f8..3c7b9d6dca95 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -336,8 +336,7 @@ static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm,
if (pte_young(entry)) {
referenced = true;
entry = pte_mkold(entry);
- huge_ptep_set_access_flags(vma, addr, pte, entry,
- vma->vm_flags & VM_WRITE);
+ set_huge_pte_at(mm, addr, pte, entry);
}
#ifdef CONFIG_MMU_NOTIFIER
diff --git a/mm/memory.c b/mm/memory.c
index 7a089145cad4..4cf7d4b6c950 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4802,6 +4802,19 @@ static vm_fault_t create_huge_pud(struct vm_fault *vmf)
defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
/* No support for anonymous transparent PUD pages yet */
if (vma_is_anonymous(vmf->vma))
+ return VM_FAULT_FALLBACK;
+ if (vmf->vma->vm_ops->huge_fault)
+ return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+ return VM_FAULT_FALLBACK;
+}
+
+static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
+{
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
+ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+ /* No support for anonymous transparent PUD pages yet */
+ if (vma_is_anonymous(vmf->vma))
goto split;
if (vmf->vma->vm_ops->huge_fault) {
vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
@@ -4812,19 +4825,7 @@ static vm_fault_t create_huge_pud(struct vm_fault *vmf)
split:
/* COW or write-notify not handled on PUD level: split pud.*/
__split_huge_pud(vmf->vma, vmf->pud, vmf->address);
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
- return VM_FAULT_FALLBACK;
-}
-
-static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
-{
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- /* No support for anonymous transparent PUD pages yet */
- if (vma_is_anonymous(vmf->vma))
- return VM_FAULT_FALLBACK;
- if (vmf->vma->vm_ops->huge_fault)
- return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
return VM_FAULT_FALLBACK;
}
diff --git a/mm/rmap.c b/mm/rmap.c
index 5bcb334cd6f2..746c05acad27 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1899,8 +1899,23 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
/* Unexpected PMD-mapped THP? */
VM_BUG_ON_FOLIO(!pvmw.pte, folio);
- subpage = folio_page(folio,
- pte_pfn(*pvmw.pte) - folio_pfn(folio));
+ if (folio_is_zone_device(folio)) {
+ /*
+ * Our PTE is a non-present device exclusive entry and
+ * calculating the subpage as for the common case would
+ * result in an invalid pointer.
+ *
+ * Since only PAGE_SIZE pages can currently be
+ * migrated, just set it to page. This will need to be
+ * changed when hugepage migrations to device private
+ * memory are supported.
+ */
+ VM_BUG_ON_FOLIO(folio_nr_pages(folio) > 1, folio);
+ subpage = &folio->page;
+ } else {
+ subpage = folio_page(folio,
+ pte_pfn(*pvmw.pte) - folio_pfn(folio));
+ }
address = pvmw.address;
anon_exclusive = folio_test_anon(folio) &&
PageAnonExclusive(subpage);
@@ -1993,15 +2008,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
/*
* No need to invalidate here it will synchronize on
* against the special swap migration pte.
- *
- * The assignment to subpage above was computed from a
- * swap PTE which results in an invalid pointer.
- * Since only PAGE_SIZE pages can currently be
- * migrated, just set it to page. This will need to be
- * changed when hugepage migrations to device private
- * memory are supported.
*/
- subpage = &folio->page;
} else if (PageHWPoison(subpage)) {
pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
if (folio_test_hugetlb(folio)) {
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index f4fa61dbbee3..dbbd1a7e65f3 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -78,6 +78,14 @@ static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
spin_lock(&init_mm.page_table_lock);
if (likely(pmd_leaf(*pmd))) {
+ /*
+ * Higher order allocations from buddy allocator must be able to
+ * be treated as indepdenent small pages (as they can be freed
+ * individually).
+ */
+ if (!PageReserved(page))
+ split_page(page, get_order(PMD_SIZE));
+
/* Make pte visible before pmd. See comment in pmd_install(). */
smp_wmb();
pmd_populate_kernel(&init_mm, pmd, pgtable);
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 4f4892a5f767..07d3befc80e4 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -246,7 +246,10 @@ static int mcontinue_atomic_pte(struct mm_struct *dst_mm,
struct page *page;
int ret;
- ret = shmem_getpage(inode, pgoff, &page, SGP_READ);
+ ret = shmem_getpage(inode, pgoff, &page, SGP_NOALLOC);
+ /* Our caller expects us to return -EFAULT if we failed to find page. */
+ if (ret == -ENOENT)
+ ret = -EFAULT;
if (ret)
goto out;
if (!page) {
diff --git a/samples/fprobe/fprobe_example.c b/samples/fprobe/fprobe_example.c
index 18b1e5c4b431..e22da8573116 100644
--- a/samples/fprobe/fprobe_example.c
+++ b/samples/fprobe/fprobe_example.c
@@ -20,7 +20,7 @@
#define BACKTRACE_DEPTH 16
#define MAX_SYMBOL_LEN 4096
-struct fprobe sample_probe;
+static struct fprobe sample_probe;
static unsigned long nhit;
static char symbol[MAX_SYMBOL_LEN] = "kernel_clone";
diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c
index f991a66b5b02..fd346f58ddba 100644
--- a/samples/kprobes/kprobe_example.c
+++ b/samples/kprobes/kprobe_example.c
@@ -16,9 +16,8 @@
#include <linux/module.h>
#include <linux/kprobes.h>
-#define MAX_SYMBOL_LEN 64
-static char symbol[MAX_SYMBOL_LEN] = "kernel_clone";
-module_param_string(symbol, symbol, sizeof(symbol), 0644);
+static char symbol[KSYM_NAME_LEN] = "kernel_clone";
+module_param_string(symbol, symbol, KSYM_NAME_LEN, 0644);
/* For each probe you need to allocate a kprobe structure */
static struct kprobe kp = {
diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c
index 228321ecb161..cbf16542d84e 100644
--- a/samples/kprobes/kretprobe_example.c
+++ b/samples/kprobes/kretprobe_example.c
@@ -23,11 +23,10 @@
#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/ktime.h>
-#include <linux/limits.h>
#include <linux/sched.h>
-static char func_name[NAME_MAX] = "kernel_clone";
-module_param_string(func, func_name, NAME_MAX, S_IRUGO);
+static char func_name[KSYM_NAME_LEN] = "kernel_clone";
+module_param_string(func, func_name, KSYM_NAME_LEN, 0644);
MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the"
" function's execution time");
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index d1425778664b..3fb6a99e78c4 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -236,6 +236,7 @@ objtool_args = \
$(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount) \
$(if $(CONFIG_UNWINDER_ORC), --orc) \
$(if $(CONFIG_RETPOLINE), --retpoline) \
+ $(if $(CONFIG_RETHUNK), --rethunk) \
$(if $(CONFIG_SLS), --sls) \
$(if $(CONFIG_STACK_VALIDATION), --stackval) \
$(if $(CONFIG_HAVE_STATIC_CALL_INLINE), --static-call) \
diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o
index 3c97a1564947..84019814f33f 100644
--- a/scripts/Makefile.vmlinux_o
+++ b/scripts/Makefile.vmlinux_o
@@ -44,7 +44,7 @@ objtool-enabled := $(or $(delay-objtool),$(CONFIG_NOINSTR_VALIDATION))
objtool_args := \
$(if $(delay-objtool),$(objtool_args)) \
- $(if $(CONFIG_NOINSTR_VALIDATION), --noinstr) \
+ $(if $(CONFIG_NOINSTR_VALIDATION), --noinstr $(if $(CONFIG_CPU_UNRET_ENTRY), --unret)) \
$(if $(CONFIG_GCOV_KERNEL), --no-unreachable) \
--link
diff --git a/security/Kconfig b/security/Kconfig
index f29e4c656983..e6db09a779b7 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -54,17 +54,6 @@ config SECURITY_NETWORK
implement socket and networking access controls.
If you are unsure how to answer this question, answer N.
-config PAGE_TABLE_ISOLATION
- bool "Remove the kernel mapping in user mode"
- default y
- depends on (X86_64 || X86_PAE) && !UML
- help
- This feature reduces the number of hardware side channels by
- ensuring that the majority of kernel addresses are not mapped
- into userspace.
-
- See Documentation/x86/pti.rst for more details.
-
config SECURITY_INFINIBAND
bool "Infiniband Security Hooks"
depends on SECURITY && INFINIBAND
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index a733aff02006..708de9656bbd 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -75,7 +75,7 @@ static struct shash_desc *init_desc(char type, uint8_t hash_algo)
{
long rc;
const char *algo;
- struct crypto_shash **tfm, *tmp_tfm = NULL;
+ struct crypto_shash **tfm, *tmp_tfm;
struct shash_desc *desc;
if (type == EVM_XATTR_HMAC) {
@@ -120,16 +120,13 @@ unlock:
alloc:
desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(*tfm),
GFP_KERNEL);
- if (!desc) {
- crypto_free_shash(tmp_tfm);
+ if (!desc)
return ERR_PTR(-ENOMEM);
- }
desc->tfm = *tfm;
rc = crypto_shash_init(desc);
if (rc) {
- crypto_free_shash(tmp_tfm);
kfree(desc);
return ERR_PTR(rc);
}
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index cdb84dccd24e..bde74fcecee3 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -514,7 +514,8 @@ int ima_appraise_measurement(enum ima_hooks func,
goto out;
}
- status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value, rc, iint);
+ status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value,
+ rc < 0 ? 0 : rc, iint);
switch (status) {
case INTEGRITY_PASS:
case INTEGRITY_PASS_IMMUTABLE:
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index a7206cc1d7d1..64499056648a 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -205,6 +205,7 @@ out_array:
crypto_free_shash(ima_algo_array[i].tfm);
}
+ kfree(ima_algo_array);
out:
crypto_free_shash(ima_shash_tfm);
return rc;
diff --git a/security/integrity/ima/ima_efi.c b/security/integrity/ima/ima_efi.c
index 71786d01946f..9db66fe310d4 100644
--- a/security/integrity/ima/ima_efi.c
+++ b/security/integrity/ima/ima_efi.c
@@ -67,6 +67,8 @@ const char * const *arch_get_ima_policy(void)
if (IS_ENABLED(CONFIG_IMA_ARCH_POLICY) && arch_ima_get_secureboot()) {
if (IS_ENABLED(CONFIG_MODULE_SIG))
set_module_sig_enforced();
+ if (IS_ENABLED(CONFIG_KEXEC_SIG))
+ set_kexec_sig_enforced();
return sb_arch_rules;
}
return NULL;
diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
index c877f01a5471..7bf9b1507220 100644
--- a/security/integrity/ima/ima_template_lib.c
+++ b/security/integrity/ima/ima_template_lib.c
@@ -323,10 +323,10 @@ static int ima_eventdigest_init_common(const u8 *digest, u32 digestsize,
else
/*
* If digest is NULL, the event being recorded is a violation.
- * Make room for the digest by increasing the offset of
- * IMA_DIGEST_SIZE.
+ * Make room for the digest by increasing the offset by the
+ * hash algorithm digest size.
*/
- offset += IMA_DIGEST_SIZE;
+ offset += hash_digest_size[hash_algo];
return ima_write_template_field_data(buffer, offset + digestsize,
fmt, field_data);
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 3e541a4c0423..83ae21a01bbf 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -944,6 +944,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
SND_PCI_QUIRK(0x103c, 0x828c, "HP EliteBook 840 G4", CXT_FIXUP_HP_DOCK),
SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x103c, 0x82b4, "HP ProDesk 600 G3", CXT_FIXUP_HP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x836e, "HP ProBook 455 G5", CXT_FIXUP_MUTE_LED_GPIO),
SND_PCI_QUIRK(0x103c, 0x837f, "HP ProBook 470 G5", CXT_FIXUP_MUTE_LED_GPIO),
SND_PCI_QUIRK(0x103c, 0x83b2, "HP EliteBook 840 G5", CXT_FIXUP_HP_DOCK),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 007dd8b5e1f2..2f55bc43bfa9 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6901,6 +6901,7 @@ enum {
ALC298_FIXUP_LENOVO_SPK_VOLUME,
ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER,
ALC269_FIXUP_ATIV_BOOK_8,
+ ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE,
ALC221_FIXUP_HP_MIC_NO_PRESENCE,
ALC256_FIXUP_ASUS_HEADSET_MODE,
ALC256_FIXUP_ASUS_MIC,
@@ -7837,6 +7838,16 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC269_FIXUP_NO_SHUTUP
},
+ [ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */
+ { 0x1a, 0x01813030 }, /* use as headphone mic, without its own jack detect */
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC269_FIXUP_HEADSET_MODE
+ },
[ALC221_FIXUP_HP_MIC_NO_PRESENCE] = {
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
@@ -8886,6 +8897,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x129c, "Acer SWIFT SF314-55", ALC256_FIXUP_ACER_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1025, 0x129d, "Acer SWIFT SF313-51", ALC256_FIXUP_ACER_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1025, 0x1300, "Acer SWIFT SF314-56", ALC256_FIXUP_ACER_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC),
@@ -8895,6 +8907,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1025, 0x1430, "Acer TravelMate B311R-31", ALC256_FIXUP_ACER_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1025, 0x1466, "Acer Aspire A515-56", ALC255_FIXUP_ACER_HEADPHONE_AND_MIC),
SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
+ SND_PCI_QUIRK(0x1028, 0x053c, "Dell Latitude E5430", ALC292_FIXUP_DELL_E7X),
SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS),
SND_PCI_QUIRK(0x1028, 0x05bd, "Dell Latitude E6440", ALC292_FIXUP_DELL_E7X),
SND_PCI_QUIRK(0x1028, 0x05be, "Dell Latitude E6540", ALC292_FIXUP_DELL_E7X),
@@ -9010,6 +9023,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x2335, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+ SND_PCI_QUIRK(0x103c, 0x2b5e, "HP 288 Pro G2 MT", ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x802e, "HP Z240 SFF", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x802f, "HP Z240", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x8077, "HP", ALC256_FIXUP_HP_HEADSET_MIC),
@@ -9096,6 +9110,10 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x89c6, "Zbook Fury 17 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x89ca, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8a78, "HP Dev One", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
+ SND_PCI_QUIRK(0x103c, 0x8aa0, "HP ProBook 440 G9 (MB 8A9E)", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8aa8, "HP EliteBook 640 G9 (MB 8AA6)", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8aab, "HP EliteBook 650 G9 (MB 8AA9)", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
@@ -9355,6 +9373,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC),
SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC),
SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED),
@@ -11217,6 +11236,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800),
SND_PCI_QUIRK(0x103c, 0x8719, "HP", ALC897_FIXUP_HP_HSMIC_VERB),
SND_PCI_QUIRK(0x103c, 0x873e, "HP", ALC671_FIXUP_HP_HEADSET_MIC2),
+ SND_PCI_QUIRK(0x103c, 0x877e, "HP 288 Pro G6", ALC671_FIXUP_HP_HEADSET_MIC2),
SND_PCI_QUIRK(0x103c, 0x885f, "HP 288 Pro G8", ALC671_FIXUP_HP_HEADSET_MIC2),
SND_PCI_QUIRK(0x1043, 0x1080, "Asus UX501VW", ALC668_FIXUP_HEADSET_MODE),
SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_ASUS_Nx50),
diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c
index e32871b3f68a..7434aeeda292 100644
--- a/sound/soc/codecs/arizona.c
+++ b/sound/soc/codecs/arizona.c
@@ -1760,8 +1760,8 @@ static bool arizona_aif_cfg_changed(struct snd_soc_component *component,
if (bclk != (val & ARIZONA_AIF1_BCLK_FREQ_MASK))
return true;
- val = snd_soc_component_read(component, base + ARIZONA_AIF_TX_BCLK_RATE);
- if (lrclk != (val & ARIZONA_AIF1TX_BCPF_MASK))
+ val = snd_soc_component_read(component, base + ARIZONA_AIF_RX_BCLK_RATE);
+ if (lrclk != (val & ARIZONA_AIF1RX_BCPF_MASK))
return true;
val = snd_soc_component_read(component, base + ARIZONA_AIF_FRAME_CTRL_1);
diff --git a/sound/soc/codecs/cs47l92.c b/sound/soc/codecs/cs47l92.c
index a1b8dcdb9f7b..444026b7d54b 100644
--- a/sound/soc/codecs/cs47l92.c
+++ b/sound/soc/codecs/cs47l92.c
@@ -119,7 +119,13 @@ static int cs47l92_put_demux(struct snd_kcontrol *kcontrol,
end:
snd_soc_dapm_mutex_unlock(dapm);
- return snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL);
+ ret = snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL);
+ if (ret < 0) {
+ dev_err(madera->dev, "Failed to update demux power state: %d\n", ret);
+ return ret;
+ }
+
+ return change;
}
static SOC_ENUM_SINGLE_DECL(cs47l92_outdemux_enum,
diff --git a/sound/soc/codecs/max98396.c b/sound/soc/codecs/max98396.c
index 56eb62bb041f..34db38812807 100644
--- a/sound/soc/codecs/max98396.c
+++ b/sound/soc/codecs/max98396.c
@@ -342,12 +342,15 @@ static int max98396_dai_set_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt)
{
struct snd_soc_component *component = codec_dai->component;
struct max98396_priv *max98396 = snd_soc_component_get_drvdata(component);
- unsigned int format = 0;
+ unsigned int format_mask, format = 0;
unsigned int bclk_pol = 0;
int ret, status;
int reg;
bool update = false;
+ format_mask = MAX98396_PCM_MODE_CFG_FORMAT_MASK |
+ MAX98396_PCM_MODE_CFG_LRCLKEDGE;
+
dev_dbg(component->dev, "%s: fmt 0x%08X\n", __func__, fmt);
switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
@@ -395,7 +398,7 @@ static int max98396_dai_set_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt)
ret = regmap_read(max98396->regmap, MAX98396_R2041_PCM_MODE_CFG, &reg);
if (ret < 0)
return -EINVAL;
- if (format != (reg & MAX98396_PCM_BCLKEDGE_BSEL_MASK)) {
+ if (format != (reg & format_mask)) {
update = true;
} else {
ret = regmap_read(max98396->regmap,
@@ -412,8 +415,7 @@ static int max98396_dai_set_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt)
regmap_update_bits(max98396->regmap,
MAX98396_R2041_PCM_MODE_CFG,
- MAX98396_PCM_BCLKEDGE_BSEL_MASK,
- format);
+ format_mask, format);
regmap_update_bits(max98396->regmap,
MAX98396_R2042_PCM_CLK_SETUP,
diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c
index 69c80d80ed9d..18b3da9211e3 100644
--- a/sound/soc/codecs/rt5640.c
+++ b/sound/soc/codecs/rt5640.c
@@ -1984,7 +1984,12 @@ static int rt5640_set_bias_level(struct snd_soc_component *component,
snd_soc_component_write(component, RT5640_PWR_DIG2, 0x0000);
snd_soc_component_write(component, RT5640_PWR_VOL, 0x0000);
snd_soc_component_write(component, RT5640_PWR_MIXER, 0x0000);
- snd_soc_component_write(component, RT5640_PWR_ANLG1, 0x0000);
+ if (rt5640->jd_src == RT5640_JD_SRC_HDA_HEADER)
+ snd_soc_component_write(component, RT5640_PWR_ANLG1,
+ 0x0018);
+ else
+ snd_soc_component_write(component, RT5640_PWR_ANLG1,
+ 0x0000);
snd_soc_component_write(component, RT5640_PWR_ANLG2, 0x0000);
break;
@@ -2393,9 +2398,15 @@ static void rt5640_jack_work(struct work_struct *work)
static irqreturn_t rt5640_irq(int irq, void *data)
{
struct rt5640_priv *rt5640 = data;
+ int delay = 0;
+
+ if (rt5640->jd_src == RT5640_JD_SRC_HDA_HEADER) {
+ cancel_delayed_work_sync(&rt5640->jack_work);
+ delay = 100;
+ }
if (rt5640->jack)
- queue_delayed_work(system_long_wq, &rt5640->jack_work, 0);
+ queue_delayed_work(system_long_wq, &rt5640->jack_work, delay);
return IRQ_HANDLED;
}
@@ -2580,6 +2591,12 @@ static void rt5640_enable_hda_jack_detect(
snd_soc_component_update_bits(component, RT5640_DUMMY1, 0x400, 0x0);
+ snd_soc_component_update_bits(component, RT5640_PWR_ANLG1,
+ RT5640_PWR_VREF2, RT5640_PWR_VREF2);
+ usleep_range(10000, 15000);
+ snd_soc_component_update_bits(component, RT5640_PWR_ANLG1,
+ RT5640_PWR_FV2, RT5640_PWR_FV2);
+
rt5640->jack = jack;
ret = request_irq(rt5640->irq, rt5640_irq,
@@ -2696,16 +2713,13 @@ static int rt5640_probe(struct snd_soc_component *component)
if (device_property_read_u32(component->dev,
"realtek,jack-detect-source", &val) == 0) {
- if (val <= RT5640_JD_SRC_GPIO4) {
+ if (val <= RT5640_JD_SRC_GPIO4)
rt5640->jd_src = val << RT5640_JD_SFT;
- } else if (val == RT5640_JD_SRC_HDA_HEADER) {
+ else if (val == RT5640_JD_SRC_HDA_HEADER)
rt5640->jd_src = RT5640_JD_SRC_HDA_HEADER;
- snd_soc_component_update_bits(component, RT5640_DUMMY1,
- 0x0300, 0x0);
- } else {
+ else
dev_warn(component->dev, "Warning: Invalid jack-detect-source value: %d, leaving jack-detect disabled\n",
val);
- }
}
if (!device_property_read_bool(component->dev, "realtek,jack-detect-not-inverted"))
diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c
index 2aa48aef6a97..3363d1696ad7 100644
--- a/sound/soc/codecs/sgtl5000.c
+++ b/sound/soc/codecs/sgtl5000.c
@@ -1795,6 +1795,9 @@ static int sgtl5000_i2c_remove(struct i2c_client *client)
{
struct sgtl5000_priv *sgtl5000 = i2c_get_clientdata(client);
+ regmap_write(sgtl5000->regmap, SGTL5000_CHIP_DIG_POWER, SGTL5000_DIG_POWER_DEFAULT);
+ regmap_write(sgtl5000->regmap, SGTL5000_CHIP_ANA_POWER, SGTL5000_ANA_POWER_DEFAULT);
+
clk_disable_unprepare(sgtl5000->mclk);
regulator_bulk_disable(sgtl5000->num_supplies, sgtl5000->supplies);
regulator_bulk_free(sgtl5000->num_supplies, sgtl5000->supplies);
@@ -1802,6 +1805,11 @@ static int sgtl5000_i2c_remove(struct i2c_client *client)
return 0;
}
+static void sgtl5000_i2c_shutdown(struct i2c_client *client)
+{
+ sgtl5000_i2c_remove(client);
+}
+
static const struct i2c_device_id sgtl5000_id[] = {
{"sgtl5000", 0},
{},
@@ -1822,6 +1830,7 @@ static struct i2c_driver sgtl5000_i2c_driver = {
},
.probe_new = sgtl5000_i2c_probe,
.remove = sgtl5000_i2c_remove,
+ .shutdown = sgtl5000_i2c_shutdown,
.id_table = sgtl5000_id,
};
diff --git a/sound/soc/codecs/sgtl5000.h b/sound/soc/codecs/sgtl5000.h
index 56ec5863f250..3a808c762299 100644
--- a/sound/soc/codecs/sgtl5000.h
+++ b/sound/soc/codecs/sgtl5000.h
@@ -80,6 +80,7 @@
/*
* SGTL5000_CHIP_DIG_POWER
*/
+#define SGTL5000_DIG_POWER_DEFAULT 0x0000
#define SGTL5000_ADC_EN 0x0040
#define SGTL5000_DAC_EN 0x0020
#define SGTL5000_DAP_POWERUP 0x0010
diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c
index d395feffb30b..4cb788f3e5f7 100644
--- a/sound/soc/codecs/tas2764.c
+++ b/sound/soc/codecs/tas2764.c
@@ -42,10 +42,12 @@ static void tas2764_reset(struct tas2764_priv *tas2764)
gpiod_set_value_cansleep(tas2764->reset_gpio, 0);
msleep(20);
gpiod_set_value_cansleep(tas2764->reset_gpio, 1);
+ usleep_range(1000, 2000);
}
snd_soc_component_write(tas2764->component, TAS2764_SW_RST,
TAS2764_RST);
+ usleep_range(1000, 2000);
}
static int tas2764_set_bias_level(struct snd_soc_component *component,
@@ -107,8 +109,10 @@ static int tas2764_codec_resume(struct snd_soc_component *component)
struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component);
int ret;
- if (tas2764->sdz_gpio)
+ if (tas2764->sdz_gpio) {
gpiod_set_value_cansleep(tas2764->sdz_gpio, 1);
+ usleep_range(1000, 2000);
+ }
ret = snd_soc_component_update_bits(component, TAS2764_PWR_CTRL,
TAS2764_PWR_CTRL_MASK,
@@ -131,7 +135,8 @@ static const char * const tas2764_ASI1_src[] = {
};
static SOC_ENUM_SINGLE_DECL(
- tas2764_ASI1_src_enum, TAS2764_TDM_CFG2, 4, tas2764_ASI1_src);
+ tas2764_ASI1_src_enum, TAS2764_TDM_CFG2, TAS2764_TDM_CFG2_SCFG_SHIFT,
+ tas2764_ASI1_src);
static const struct snd_kcontrol_new tas2764_asi1_mux =
SOC_DAPM_ENUM("ASI1 Source", tas2764_ASI1_src_enum);
@@ -329,20 +334,22 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
{
struct snd_soc_component *component = dai->component;
struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component);
- u8 tdm_rx_start_slot = 0, asi_cfg_1 = 0;
- int iface;
+ u8 tdm_rx_start_slot = 0, asi_cfg_0 = 0, asi_cfg_1 = 0;
int ret;
switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+ case SND_SOC_DAIFMT_NB_IF:
+ asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START;
+ fallthrough;
case SND_SOC_DAIFMT_NB_NF:
asi_cfg_1 = TAS2764_TDM_CFG1_RX_RISING;
break;
+ case SND_SOC_DAIFMT_IB_IF:
+ asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START;
+ fallthrough;
case SND_SOC_DAIFMT_IB_NF:
asi_cfg_1 = TAS2764_TDM_CFG1_RX_FALLING;
break;
- default:
- dev_err(tas2764->dev, "ASI format Inverse is not found\n");
- return -EINVAL;
}
ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1,
@@ -353,13 +360,13 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
case SND_SOC_DAIFMT_I2S:
+ asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START;
+ fallthrough;
case SND_SOC_DAIFMT_DSP_A:
- iface = TAS2764_TDM_CFG2_SCFG_I2S;
tdm_rx_start_slot = 1;
break;
case SND_SOC_DAIFMT_DSP_B:
case SND_SOC_DAIFMT_LEFT_J:
- iface = TAS2764_TDM_CFG2_SCFG_LEFT_J;
tdm_rx_start_slot = 0;
break;
default:
@@ -368,14 +375,15 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
return -EINVAL;
}
- ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1,
- TAS2764_TDM_CFG1_MASK,
- (tdm_rx_start_slot << TAS2764_TDM_CFG1_51_SHIFT));
+ ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG0,
+ TAS2764_TDM_CFG0_FRAME_START,
+ asi_cfg_0);
if (ret < 0)
return ret;
- ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG2,
- TAS2764_TDM_CFG2_SCFG_MASK, iface);
+ ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1,
+ TAS2764_TDM_CFG1_MASK,
+ (tdm_rx_start_slot << TAS2764_TDM_CFG1_51_SHIFT));
if (ret < 0)
return ret;
@@ -501,8 +509,10 @@ static int tas2764_codec_probe(struct snd_soc_component *component)
tas2764->component = component;
- if (tas2764->sdz_gpio)
+ if (tas2764->sdz_gpio) {
gpiod_set_value_cansleep(tas2764->sdz_gpio, 1);
+ usleep_range(1000, 2000);
+ }
tas2764_reset(tas2764);
@@ -526,12 +536,12 @@ static int tas2764_codec_probe(struct snd_soc_component *component)
}
static DECLARE_TLV_DB_SCALE(tas2764_digital_tlv, 1100, 50, 0);
-static DECLARE_TLV_DB_SCALE(tas2764_playback_volume, -10000, 50, 0);
+static DECLARE_TLV_DB_SCALE(tas2764_playback_volume, -10050, 50, 1);
static const struct snd_kcontrol_new tas2764_snd_controls[] = {
SOC_SINGLE_TLV("Speaker Volume", TAS2764_DVC, 0,
TAS2764_DVC_MAX, 1, tas2764_playback_volume),
- SOC_SINGLE_TLV("Amp Gain Volume", TAS2764_CHNL_0, 0, 0x14, 0,
+ SOC_SINGLE_TLV("Amp Gain Volume", TAS2764_CHNL_0, 1, 0x14, 0,
tas2764_digital_tlv),
};
@@ -556,7 +566,7 @@ static const struct reg_default tas2764_reg_defaults[] = {
{ TAS2764_SW_RST, 0x00 },
{ TAS2764_PWR_CTRL, 0x1a },
{ TAS2764_DVC, 0x00 },
- { TAS2764_CHNL_0, 0x00 },
+ { TAS2764_CHNL_0, 0x28 },
{ TAS2764_TDM_CFG0, 0x09 },
{ TAS2764_TDM_CFG1, 0x02 },
{ TAS2764_TDM_CFG2, 0x0a },
diff --git a/sound/soc/codecs/tas2764.h b/sound/soc/codecs/tas2764.h
index 67d6fd903c42..f015f22a083b 100644
--- a/sound/soc/codecs/tas2764.h
+++ b/sound/soc/codecs/tas2764.h
@@ -47,6 +47,7 @@
#define TAS2764_TDM_CFG0_MASK GENMASK(3, 1)
#define TAS2764_TDM_CFG0_44_1_48KHZ BIT(3)
#define TAS2764_TDM_CFG0_88_2_96KHZ (BIT(3) | BIT(1))
+#define TAS2764_TDM_CFG0_FRAME_START BIT(0)
/* TDM Configuration Reg1 */
#define TAS2764_TDM_CFG1 TAS2764_REG(0X0, 0x09)
@@ -66,10 +67,7 @@
#define TAS2764_TDM_CFG2_RXS_16BITS 0x0
#define TAS2764_TDM_CFG2_RXS_24BITS BIT(0)
#define TAS2764_TDM_CFG2_RXS_32BITS BIT(1)
-#define TAS2764_TDM_CFG2_SCFG_MASK GENMASK(5, 4)
-#define TAS2764_TDM_CFG2_SCFG_I2S 0x0
-#define TAS2764_TDM_CFG2_SCFG_LEFT_J BIT(4)
-#define TAS2764_TDM_CFG2_SCFG_RIGHT_J BIT(5)
+#define TAS2764_TDM_CFG2_SCFG_SHIFT 4
/* TDM Configuration Reg3 */
#define TAS2764_TDM_CFG3 TAS2764_REG(0X0, 0x0c)
diff --git a/sound/soc/codecs/tlv320adcx140.c b/sound/soc/codecs/tlv320adcx140.c
index b55f0b836932..0b729658fde8 100644
--- a/sound/soc/codecs/tlv320adcx140.c
+++ b/sound/soc/codecs/tlv320adcx140.c
@@ -33,7 +33,6 @@ struct adcx140_priv {
bool micbias_vg;
unsigned int dai_fmt;
- unsigned int tdm_delay;
unsigned int slot_width;
};
@@ -792,12 +791,13 @@ static int adcx140_set_dai_tdm_slot(struct snd_soc_dai *codec_dai,
{
struct snd_soc_component *component = codec_dai->component;
struct adcx140_priv *adcx140 = snd_soc_component_get_drvdata(component);
- unsigned int lsb;
- /* TDM based on DSP mode requires slots to be adjacent */
- lsb = __ffs(tx_mask);
- if ((lsb + 1) != __fls(tx_mask)) {
- dev_err(component->dev, "Invalid mask, slots must be adjacent\n");
+ /*
+ * The chip itself supports arbitrary masks, but the driver currently
+ * only supports adjacent slots beginning at the first slot.
+ */
+ if (tx_mask != GENMASK(__fls(tx_mask), 0)) {
+ dev_err(component->dev, "Only lower adjacent slots are supported\n");
return -EINVAL;
}
@@ -812,7 +812,6 @@ static int adcx140_set_dai_tdm_slot(struct snd_soc_dai *codec_dai,
return -EINVAL;
}
- adcx140->tdm_delay = lsb;
adcx140->slot_width = slot_width;
return 0;
diff --git a/sound/soc/codecs/wcd9335.c b/sound/soc/codecs/wcd9335.c
index d9f135200688..3cb7a3eab8c7 100644
--- a/sound/soc/codecs/wcd9335.c
+++ b/sound/soc/codecs/wcd9335.c
@@ -342,7 +342,7 @@ struct wcd9335_codec {
struct regulator_bulk_data supplies[WCD9335_MAX_SUPPLY];
unsigned int rx_port_value[WCD9335_RX_MAX];
- unsigned int tx_port_value;
+ unsigned int tx_port_value[WCD9335_TX_MAX];
int hph_l_gain;
int hph_r_gain;
u32 rx_bias_count;
@@ -1334,8 +1334,13 @@ static int slim_tx_mixer_get(struct snd_kcontrol *kc,
struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_dapm(kc);
struct wcd9335_codec *wcd = dev_get_drvdata(dapm->dev);
+ struct snd_soc_dapm_widget *widget = snd_soc_dapm_kcontrol_widget(kc);
+ struct soc_mixer_control *mixer =
+ (struct soc_mixer_control *)kc->private_value;
+ int dai_id = widget->shift;
+ int port_id = mixer->shift;
- ucontrol->value.integer.value[0] = wcd->tx_port_value;
+ ucontrol->value.integer.value[0] = wcd->tx_port_value[port_id] == dai_id;
return 0;
}
@@ -1358,12 +1363,12 @@ static int slim_tx_mixer_put(struct snd_kcontrol *kc,
case AIF2_CAP:
case AIF3_CAP:
/* only add to the list if value not set */
- if (enable && !(wcd->tx_port_value & BIT(port_id))) {
- wcd->tx_port_value |= BIT(port_id);
+ if (enable && wcd->tx_port_value[port_id] != dai_id) {
+ wcd->tx_port_value[port_id] = dai_id;
list_add_tail(&wcd->tx_chs[port_id].list,
&wcd->dai[dai_id].slim_ch_list);
- } else if (!enable && (wcd->tx_port_value & BIT(port_id))) {
- wcd->tx_port_value &= ~BIT(port_id);
+ } else if (!enable && wcd->tx_port_value[port_id] == dai_id) {
+ wcd->tx_port_value[port_id] = -1;
list_del_init(&wcd->tx_chs[port_id].list);
}
break;
diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c
index da2f8998df87..b034df47a5ef 100644
--- a/sound/soc/codecs/wm5102.c
+++ b/sound/soc/codecs/wm5102.c
@@ -680,12 +680,17 @@ static int wm5102_out_comp_coeff_put(struct snd_kcontrol *kcontrol,
{
struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
struct arizona *arizona = dev_get_drvdata(component->dev->parent);
+ uint16_t dac_comp_coeff = get_unaligned_be16(ucontrol->value.bytes.data);
+ int ret = 0;
mutex_lock(&arizona->dac_comp_lock);
- arizona->dac_comp_coeff = get_unaligned_be16(ucontrol->value.bytes.data);
+ if (arizona->dac_comp_coeff != dac_comp_coeff) {
+ arizona->dac_comp_coeff = dac_comp_coeff;
+ ret = 1;
+ }
mutex_unlock(&arizona->dac_comp_lock);
- return 0;
+ return ret;
}
static int wm5102_out_comp_switch_get(struct snd_kcontrol *kcontrol,
@@ -706,12 +711,20 @@ static int wm5102_out_comp_switch_put(struct snd_kcontrol *kcontrol,
{
struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
struct arizona *arizona = dev_get_drvdata(component->dev->parent);
+ struct soc_mixer_control *mc = (struct soc_mixer_control *)kcontrol->private_value;
+ int ret = 0;
+
+ if (ucontrol->value.integer.value[0] > mc->max)
+ return -EINVAL;
mutex_lock(&arizona->dac_comp_lock);
- arizona->dac_comp_enabled = ucontrol->value.integer.value[0];
+ if (arizona->dac_comp_enabled != ucontrol->value.integer.value[0]) {
+ arizona->dac_comp_enabled = ucontrol->value.integer.value[0];
+ ret = 1;
+ }
mutex_unlock(&arizona->dac_comp_lock);
- return 0;
+ return ret;
}
static const char * const wm5102_osr_text[] = {
diff --git a/sound/soc/codecs/wm8998.c b/sound/soc/codecs/wm8998.c
index 00b59fc9b1fe..ab5481187c71 100644
--- a/sound/soc/codecs/wm8998.c
+++ b/sound/soc/codecs/wm8998.c
@@ -108,6 +108,7 @@ static int wm8998_inmux_put(struct snd_kcontrol *kcontrol,
struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
unsigned int mode_reg, mode_index;
unsigned int mux, inmode, src_val, mode_val;
+ int change, ret;
mux = ucontrol->value.enumerated.item[0];
if (mux > 1)
@@ -137,14 +138,20 @@ static int wm8998_inmux_put(struct snd_kcontrol *kcontrol,
snd_soc_component_update_bits(component, mode_reg,
ARIZONA_IN1_MODE_MASK, mode_val);
- snd_soc_component_update_bits(component, e->reg,
- ARIZONA_IN1L_SRC_MASK |
- ARIZONA_IN1L_SRC_SE_MASK,
- src_val);
+ change = snd_soc_component_update_bits(component, e->reg,
+ ARIZONA_IN1L_SRC_MASK |
+ ARIZONA_IN1L_SRC_SE_MASK,
+ src_val);
- return snd_soc_dapm_mux_update_power(dapm, kcontrol,
- ucontrol->value.enumerated.item[0],
- e, NULL);
+ ret = snd_soc_dapm_mux_update_power(dapm, kcontrol,
+ ucontrol->value.enumerated.item[0],
+ e, NULL);
+ if (ret < 0) {
+ dev_err(arizona->dev, "Failed to update demux power state: %d\n", ret);
+ return ret;
+ }
+
+ return change;
}
static const char * const wm8998_inmux_texts[] = {
diff --git a/sound/soc/generic/audio-graph-card2.c b/sound/soc/generic/audio-graph-card2.c
index 77ac4051b827..d34b29a49268 100644
--- a/sound/soc/generic/audio-graph-card2.c
+++ b/sound/soc/generic/audio-graph-card2.c
@@ -90,12 +90,12 @@ links indicates connection part of CPU side (= A).
ports@0 {
(X) (A) mcpu: port@0 { mcpu0_ep: endpoint { remote-endpoint = <&mcodec0_ep>; }; };
(y) port@1 { mcpu1_ep: endpoint { remote-endpoint = <&cpu1_ep>; }; };
-(y) port@1 { mcpu2_ep: endpoint { remote-endpoint = <&cpu2_ep>; }; };
+(y) port@2 { mcpu2_ep: endpoint { remote-endpoint = <&cpu2_ep>; }; };
};
ports@1 {
(X) port@0 { mcodec0_ep: endpoint { remote-endpoint = <&mcpu0_ep>; }; };
-(y) port@0 { mcodec1_ep: endpoint { remote-endpoint = <&codec1_ep>; }; };
-(y) port@1 { mcodec2_ep: endpoint { remote-endpoint = <&codec2_ep>; }; };
+(y) port@1 { mcodec1_ep: endpoint { remote-endpoint = <&codec1_ep>; }; };
+(y) port@2 { mcodec2_ep: endpoint { remote-endpoint = <&codec2_ep>; }; };
};
};
};
diff --git a/sound/soc/intel/boards/sof_rt5682.c b/sound/soc/intel/boards/sof_rt5682.c
index 5d67a2c87a1d..4a90a0a5d831 100644
--- a/sound/soc/intel/boards/sof_rt5682.c
+++ b/sound/soc/intel/boards/sof_rt5682.c
@@ -69,11 +69,10 @@ static unsigned long sof_rt5682_quirk = SOF_RT5682_MCLK_EN |
static int is_legacy_cpu;
-static struct snd_soc_jack sof_hdmi[3];
-
struct sof_hdmi_pcm {
struct list_head head;
struct snd_soc_dai *codec_dai;
+ struct snd_soc_jack hdmi_jack;
int device;
};
@@ -434,7 +433,6 @@ static int sof_card_late_probe(struct snd_soc_card *card)
char jack_name[NAME_SIZE];
struct sof_hdmi_pcm *pcm;
int err;
- int i = 0;
/* HDMI is not supported by SOF on Baytrail/CherryTrail */
if (is_legacy_cpu || !ctx->idisp_codec)
@@ -455,17 +453,15 @@ static int sof_card_late_probe(struct snd_soc_card *card)
snprintf(jack_name, sizeof(jack_name),
"HDMI/DP, pcm=%d Jack", pcm->device);
err = snd_soc_card_jack_new(card, jack_name,
- SND_JACK_AVOUT, &sof_hdmi[i]);
+ SND_JACK_AVOUT, &pcm->hdmi_jack);
if (err)
return err;
err = hdac_hdmi_jack_init(pcm->codec_dai, pcm->device,
- &sof_hdmi[i]);
+ &pcm->hdmi_jack);
if (err < 0)
return err;
-
- i++;
}
if (sof_rt5682_quirk & SOF_MAX98373_SPEAKER_AMP_PRESENT) {
diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c
index 2439a574ac2f..deb7b820325e 100644
--- a/sound/soc/intel/skylake/skl-nhlt.c
+++ b/sound/soc/intel/skylake/skl-nhlt.c
@@ -99,7 +99,6 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks,
struct nhlt_fmt_cfg *fmt_cfg;
struct wav_fmt_ext *wav_fmt;
unsigned long rate;
- bool present = false;
int rate_index = 0;
u16 channels, bps;
u8 clk_src;
@@ -112,9 +111,12 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks,
if (fmt->fmt_count == 0)
return;
+ fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config;
for (i = 0; i < fmt->fmt_count; i++) {
- fmt_cfg = &fmt->fmt_config[i];
- wav_fmt = &fmt_cfg->fmt_ext;
+ struct nhlt_fmt_cfg *saved_fmt_cfg = fmt_cfg;
+ bool present = false;
+
+ wav_fmt = &saved_fmt_cfg->fmt_ext;
channels = wav_fmt->fmt.channels;
bps = wav_fmt->fmt.bits_per_sample;
@@ -132,12 +134,18 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks,
* derive the rate.
*/
for (j = i; j < fmt->fmt_count; j++) {
- fmt_cfg = &fmt->fmt_config[j];
- wav_fmt = &fmt_cfg->fmt_ext;
+ struct nhlt_fmt_cfg *tmp_fmt_cfg = fmt_cfg;
+
+ wav_fmt = &tmp_fmt_cfg->fmt_ext;
if ((fs == wav_fmt->fmt.samples_per_sec) &&
- (bps == wav_fmt->fmt.bits_per_sample))
+ (bps == wav_fmt->fmt.bits_per_sample)) {
channels = max_t(u16, channels,
wav_fmt->fmt.channels);
+ saved_fmt_cfg = tmp_fmt_cfg;
+ }
+ /* Move to the next nhlt_fmt_cfg */
+ tmp_fmt_cfg = (struct nhlt_fmt_cfg *)(tmp_fmt_cfg->config.caps +
+ tmp_fmt_cfg->config.size);
}
rate = channels * bps * fs;
@@ -153,8 +161,11 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks,
/* Fill rate and parent for sclk/sclkfs */
if (!present) {
+ struct nhlt_fmt_cfg *first_fmt_cfg;
+
+ first_fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config;
i2s_config_ext = (struct skl_i2s_config_blob_ext *)
- fmt->fmt_config[0].config.caps;
+ first_fmt_cfg->config.caps;
/* MCLK Divider Source Select */
if (is_legacy_blob(i2s_config_ext->hdr.sig)) {
@@ -168,6 +179,9 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks,
parent = skl_get_parent_clk(clk_src);
+ /* Move to the next nhlt_fmt_cfg */
+ fmt_cfg = (struct nhlt_fmt_cfg *)(fmt_cfg->config.caps +
+ fmt_cfg->config.size);
/*
* Do not copy the config data if there is no parent
* clock available for this clock source select
@@ -176,9 +190,9 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks,
continue;
sclk[id].rate_cfg[rate_index].rate = rate;
- sclk[id].rate_cfg[rate_index].config = fmt_cfg;
+ sclk[id].rate_cfg[rate_index].config = saved_fmt_cfg;
sclkfs[id].rate_cfg[rate_index].rate = rate;
- sclkfs[id].rate_cfg[rate_index].config = fmt_cfg;
+ sclkfs[id].rate_cfg[rate_index].config = saved_fmt_cfg;
sclk[id].parent_name = parent->name;
sclkfs[id].parent_name = parent->name;
@@ -192,13 +206,13 @@ static void skl_get_mclk(struct skl_dev *skl, struct skl_ssp_clk *mclk,
{
struct skl_i2s_config_blob_ext *i2s_config_ext;
struct skl_i2s_config_blob_legacy *i2s_config;
- struct nhlt_specific_cfg *fmt_cfg;
+ struct nhlt_fmt_cfg *fmt_cfg;
struct skl_clk_parent_src *parent;
u32 clkdiv, div_ratio;
u8 clk_src;
- fmt_cfg = &fmt->fmt_config[0].config;
- i2s_config_ext = (struct skl_i2s_config_blob_ext *)fmt_cfg->caps;
+ fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config;
+ i2s_config_ext = (struct skl_i2s_config_blob_ext *)fmt_cfg->config.caps;
/* MCLK Divider Source Select and divider */
if (is_legacy_blob(i2s_config_ext->hdr.sig)) {
@@ -227,7 +241,7 @@ static void skl_get_mclk(struct skl_dev *skl, struct skl_ssp_clk *mclk,
return;
mclk[id].rate_cfg[0].rate = parent->rate/div_ratio;
- mclk[id].rate_cfg[0].config = &fmt->fmt_config[0];
+ mclk[id].rate_cfg[0].config = fmt_cfg;
mclk[id].parent_name = parent->name;
}
diff --git a/sound/soc/qcom/qdsp6/q6apm.c b/sound/soc/qcom/qdsp6/q6apm.c
index f424d7aa389a..794019286c70 100644
--- a/sound/soc/qcom/qdsp6/q6apm.c
+++ b/sound/soc/qcom/qdsp6/q6apm.c
@@ -75,6 +75,7 @@ static struct audioreach_graph *q6apm_get_audioreach_graph(struct q6apm *apm, ui
id = idr_alloc(&apm->graph_idr, graph, graph_id, graph_id + 1, GFP_KERNEL);
if (id < 0) {
dev_err(apm->dev, "Unable to allocate graph id (%d)\n", graph_id);
+ kfree(graph->graph);
kfree(graph);
mutex_unlock(&apm->lock);
return ERR_PTR(id);
diff --git a/sound/soc/ti/omap-mcbsp-priv.h b/sound/soc/ti/omap-mcbsp-priv.h
index 7865cda4bf0a..da519ea1f303 100644
--- a/sound/soc/ti/omap-mcbsp-priv.h
+++ b/sound/soc/ti/omap-mcbsp-priv.h
@@ -316,8 +316,6 @@ static inline int omap_mcbsp_read(struct omap_mcbsp *mcbsp, u16 reg,
/* Sidetone specific API */
int omap_mcbsp_st_init(struct platform_device *pdev);
-void omap_mcbsp_st_cleanup(struct platform_device *pdev);
-
int omap_mcbsp_st_start(struct omap_mcbsp *mcbsp);
int omap_mcbsp_st_stop(struct omap_mcbsp *mcbsp);
diff --git a/sound/soc/ti/omap-mcbsp-st.c b/sound/soc/ti/omap-mcbsp-st.c
index 0bc7d26c660a..7e8179cae92e 100644
--- a/sound/soc/ti/omap-mcbsp-st.c
+++ b/sound/soc/ti/omap-mcbsp-st.c
@@ -347,7 +347,7 @@ int omap_mcbsp_st_init(struct platform_device *pdev)
if (!st_data)
return -ENOMEM;
- st_data->mcbsp_iclk = clk_get(mcbsp->dev, "ick");
+ st_data->mcbsp_iclk = devm_clk_get(mcbsp->dev, "ick");
if (IS_ERR(st_data->mcbsp_iclk)) {
dev_warn(mcbsp->dev,
"Failed to get ick, sidetone might be broken\n");
@@ -359,7 +359,7 @@ int omap_mcbsp_st_init(struct platform_device *pdev)
if (!st_data->io_base_st)
return -ENOMEM;
- ret = sysfs_create_group(&mcbsp->dev->kobj, &sidetone_attr_group);
+ ret = devm_device_add_group(mcbsp->dev, &sidetone_attr_group);
if (ret)
return ret;
@@ -368,16 +368,6 @@ int omap_mcbsp_st_init(struct platform_device *pdev)
return 0;
}
-void omap_mcbsp_st_cleanup(struct platform_device *pdev)
-{
- struct omap_mcbsp *mcbsp = platform_get_drvdata(pdev);
-
- if (mcbsp->st_data) {
- sysfs_remove_group(&mcbsp->dev->kobj, &sidetone_attr_group);
- clk_put(mcbsp->st_data->mcbsp_iclk);
- }
-}
-
static int omap_mcbsp_st_info_volsw(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_info *uinfo)
{
diff --git a/sound/soc/ti/omap-mcbsp.c b/sound/soc/ti/omap-mcbsp.c
index 4479d74f0a45..9933b33c80ca 100644
--- a/sound/soc/ti/omap-mcbsp.c
+++ b/sound/soc/ti/omap-mcbsp.c
@@ -702,8 +702,7 @@ static int omap_mcbsp_init(struct platform_device *pdev)
mcbsp->max_tx_thres = max_thres(mcbsp) - 0x10;
mcbsp->max_rx_thres = max_thres(mcbsp) - 0x10;
- ret = sysfs_create_group(&mcbsp->dev->kobj,
- &additional_attr_group);
+ ret = devm_device_add_group(mcbsp->dev, &additional_attr_group);
if (ret) {
dev_err(mcbsp->dev,
"Unable to create additional controls\n");
@@ -711,16 +710,7 @@ static int omap_mcbsp_init(struct platform_device *pdev)
}
}
- ret = omap_mcbsp_st_init(pdev);
- if (ret)
- goto err_st;
-
- return 0;
-
-err_st:
- if (mcbsp->pdata->buffer_size)
- sysfs_remove_group(&mcbsp->dev->kobj, &additional_attr_group);
- return ret;
+ return omap_mcbsp_st_init(pdev);
}
/*
@@ -1431,11 +1421,6 @@ static int asoc_mcbsp_remove(struct platform_device *pdev)
if (cpu_latency_qos_request_active(&mcbsp->pm_qos_req))
cpu_latency_qos_remove_request(&mcbsp->pm_qos_req);
- if (mcbsp->pdata->buffer_size)
- sysfs_remove_group(&mcbsp->dev->kobj, &additional_attr_group);
-
- omap_mcbsp_st_cleanup(pdev);
-
return 0;
}
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index d27e0581b777..2eab6a3a8a8c 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -51,6 +51,8 @@
#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
+#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
+#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
@@ -140,6 +142,13 @@
* bit available to control VERW
* behavior.
*/
+#define ARCH_CAP_RRSBA BIT(19) /*
+ * Indicates RET may use predictors
+ * other than the RSB. With eIBRS
+ * enabled predictions in kernel mode
+ * are restricted to targets in
+ * kernel.
+ */
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h
index 15b940ec1eac..10bc88cc3bf6 100644
--- a/tools/include/linux/objtool.h
+++ b/tools/include/linux/objtool.h
@@ -32,11 +32,16 @@ struct unwind_hint {
*
* UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
* Useful for code which doesn't have an ELF function annotation.
+ *
+ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc.
*/
#define UNWIND_HINT_TYPE_CALL 0
#define UNWIND_HINT_TYPE_REGS 1
#define UNWIND_HINT_TYPE_REGS_PARTIAL 2
#define UNWIND_HINT_TYPE_FUNC 3
+#define UNWIND_HINT_TYPE_ENTRY 4
+#define UNWIND_HINT_TYPE_SAVE 5
+#define UNWIND_HINT_TYPE_RESTORE 6
#ifdef CONFIG_OBJTOOL
@@ -124,7 +129,7 @@ struct unwind_hint {
* the debuginfo as necessary. It will also warn if it sees any
* inconsistencies.
*/
-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
.Lunwind_hint_ip_\@:
.pushsection .discard.unwind_hints
/* struct unwind_hint */
@@ -177,7 +182,7 @@ struct unwind_hint {
#define ASM_REACHABLE
#else
#define ANNOTATE_INTRA_FUNCTION_CALL
-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
.endm
.macro STACK_FRAME_NON_STANDARD func:req
.endm
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 8b990a52aada..c260006106be 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -787,3 +787,8 @@ bool arch_is_retpoline(struct symbol *sym)
{
return !strncmp(sym->name, "__x86_indirect_", 15);
}
+
+bool arch_is_rethunk(struct symbol *sym)
+{
+ return !strcmp(sym->name, "__x86_return_thunk");
+}
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index f4c3a5091737..24fbe803a0d3 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -68,6 +68,8 @@ const struct option check_options[] = {
OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"),
OPT_BOOLEAN('o', "orc", &opts.orc, "generate ORC metadata"),
OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"),
+ OPT_BOOLEAN(0, "rethunk", &opts.rethunk, "validate and annotate rethunk usage"),
+ OPT_BOOLEAN(0, "unret", &opts.unret, "validate entry unret placement"),
OPT_BOOLEAN('l', "sls", &opts.sls, "validate straight-line-speculation mitigations"),
OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"),
OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"),
@@ -123,6 +125,7 @@ static bool opts_valid(void)
opts.noinstr ||
opts.orc ||
opts.retpoline ||
+ opts.rethunk ||
opts.sls ||
opts.stackval ||
opts.static_call ||
@@ -135,6 +138,11 @@ static bool opts_valid(void)
return true;
}
+ if (opts.unret && !opts.rethunk) {
+ ERROR("--unret requires --rethunk");
+ return false;
+ }
+
if (opts.dump_orc)
return true;
@@ -163,6 +171,11 @@ static bool link_opts_valid(struct objtool_file *file)
return false;
}
+ if (opts.unret) {
+ ERROR("--unret requires --link");
+ return false;
+ }
+
return true;
}
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 57153e00349c..b341f8a8c7c5 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -376,7 +376,8 @@ static int decode_instructions(struct objtool_file *file)
sec->text = true;
if (!strcmp(sec->name, ".noinstr.text") ||
- !strcmp(sec->name, ".entry.text"))
+ !strcmp(sec->name, ".entry.text") ||
+ !strncmp(sec->name, ".text.__x86.", 12))
sec->noinstr = true;
for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) {
@@ -749,6 +750,52 @@ static int create_retpoline_sites_sections(struct objtool_file *file)
return 0;
}
+static int create_return_sites_sections(struct objtool_file *file)
+{
+ struct instruction *insn;
+ struct section *sec;
+ int idx;
+
+ sec = find_section_by_name(file->elf, ".return_sites");
+ if (sec) {
+ WARN("file already has .return_sites, skipping");
+ return 0;
+ }
+
+ idx = 0;
+ list_for_each_entry(insn, &file->return_thunk_list, call_node)
+ idx++;
+
+ if (!idx)
+ return 0;
+
+ sec = elf_create_section(file->elf, ".return_sites", 0,
+ sizeof(int), idx);
+ if (!sec) {
+ WARN("elf_create_section: .return_sites");
+ return -1;
+ }
+
+ idx = 0;
+ list_for_each_entry(insn, &file->return_thunk_list, call_node) {
+
+ int *site = (int *)sec->data->d_buf + idx;
+ *site = 0;
+
+ if (elf_add_reloc_to_insn(file->elf, sec,
+ idx * sizeof(int),
+ R_X86_64_PC32,
+ insn->sec, insn->offset)) {
+ WARN("elf_add_reloc_to_insn: .return_sites");
+ return -1;
+ }
+
+ idx++;
+ }
+
+ return 0;
+}
+
static int create_ibt_endbr_seal_sections(struct objtool_file *file)
{
struct instruction *insn;
@@ -1083,6 +1130,11 @@ __weak bool arch_is_retpoline(struct symbol *sym)
return false;
}
+__weak bool arch_is_rethunk(struct symbol *sym)
+{
+ return false;
+}
+
#define NEGATIVE_RELOC ((void *)-1L)
static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
@@ -1250,6 +1302,19 @@ static void add_retpoline_call(struct objtool_file *file, struct instruction *in
annotate_call_site(file, insn, false);
}
+static void add_return_call(struct objtool_file *file, struct instruction *insn, bool add)
+{
+ /*
+ * Return thunk tail calls are really just returns in disguise,
+ * so convert them accordingly.
+ */
+ insn->type = INSN_RETURN;
+ insn->retpoline_safe = true;
+
+ if (add)
+ list_add_tail(&insn->call_node, &file->return_thunk_list);
+}
+
static bool same_function(struct instruction *insn1, struct instruction *insn2)
{
return insn1->func->pfunc == insn2->func->pfunc;
@@ -1302,6 +1367,9 @@ static int add_jump_destinations(struct objtool_file *file)
} else if (reloc->sym->retpoline_thunk) {
add_retpoline_call(file, insn);
continue;
+ } else if (reloc->sym->return_thunk) {
+ add_return_call(file, insn, true);
+ continue;
} else if (insn->func) {
/*
* External sibling call or internal sibling call with
@@ -1320,6 +1388,21 @@ static int add_jump_destinations(struct objtool_file *file)
jump_dest = find_insn(file, dest_sec, dest_off);
if (!jump_dest) {
+ struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off);
+
+ /*
+ * This is a special case for zen_untrain_ret().
+ * It jumps to __x86_return_thunk(), but objtool
+ * can't find the thunk's starting RET
+ * instruction, because the RET is also in the
+ * middle of another instruction. Objtool only
+ * knows about the outer instruction.
+ */
+ if (sym && sym->return_thunk) {
+ add_return_call(file, insn, false);
+ continue;
+ }
+
WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
insn->sec, insn->offset, dest_sec->name,
dest_off);
@@ -1949,16 +2032,35 @@ static int read_unwind_hints(struct objtool_file *file)
insn->hint = true;
- if (opts.ibt && hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
+ if (hint->type == UNWIND_HINT_TYPE_SAVE) {
+ insn->hint = false;
+ insn->save = true;
+ continue;
+ }
+
+ if (hint->type == UNWIND_HINT_TYPE_RESTORE) {
+ insn->restore = true;
+ continue;
+ }
+
+ if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset);
- if (sym && sym->bind == STB_GLOBAL &&
- insn->type != INSN_ENDBR && !insn->noendbr) {
- WARN_FUNC("UNWIND_HINT_IRET_REGS without ENDBR",
- insn->sec, insn->offset);
+ if (sym && sym->bind == STB_GLOBAL) {
+ if (opts.ibt && insn->type != INSN_ENDBR && !insn->noendbr) {
+ WARN_FUNC("UNWIND_HINT_IRET_REGS without ENDBR",
+ insn->sec, insn->offset);
+ }
+
+ insn->entry = 1;
}
}
+ if (hint->type == UNWIND_HINT_TYPE_ENTRY) {
+ hint->type = UNWIND_HINT_TYPE_CALL;
+ insn->entry = 1;
+ }
+
if (hint->type == UNWIND_HINT_TYPE_FUNC) {
insn->cfi = &func_cfi;
continue;
@@ -2032,8 +2134,10 @@ static int read_retpoline_hints(struct objtool_file *file)
}
if (insn->type != INSN_JUMP_DYNAMIC &&
- insn->type != INSN_CALL_DYNAMIC) {
- WARN_FUNC("retpoline_safe hint not an indirect jump/call",
+ insn->type != INSN_CALL_DYNAMIC &&
+ insn->type != INSN_RETURN &&
+ insn->type != INSN_NOP) {
+ WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret/nop",
insn->sec, insn->offset);
return -1;
}
@@ -2184,6 +2288,9 @@ static int classify_symbols(struct objtool_file *file)
if (arch_is_retpoline(func))
func->retpoline_thunk = true;
+ if (arch_is_rethunk(func))
+ func->return_thunk = true;
+
if (!strcmp(func->name, "__fentry__"))
func->fentry = true;
@@ -3218,8 +3325,8 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
return 1;
}
- visited = 1 << state.uaccess;
- if (insn->visited) {
+ visited = VISITED_BRANCH << state.uaccess;
+ if (insn->visited & VISITED_BRANCH_MASK) {
if (!insn->hint && !insn_cfi_match(insn, &state.cfi))
return 1;
@@ -3233,6 +3340,35 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
state.instr += insn->instr;
if (insn->hint) {
+ if (insn->restore) {
+ struct instruction *save_insn, *i;
+
+ i = insn;
+ save_insn = NULL;
+
+ sym_for_each_insn_continue_reverse(file, func, i) {
+ if (i->save) {
+ save_insn = i;
+ break;
+ }
+ }
+
+ if (!save_insn) {
+ WARN_FUNC("no corresponding CFI save for CFI restore",
+ sec, insn->offset);
+ return 1;
+ }
+
+ if (!save_insn->visited) {
+ WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo",
+ sec, insn->offset);
+ return 1;
+ }
+
+ insn->cfi = save_insn->cfi;
+ nr_cfi_reused++;
+ }
+
state.cfi = *insn->cfi;
} else {
/* XXX track if we actually changed state.cfi */
@@ -3433,6 +3569,145 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
return warnings;
}
+/*
+ * Validate rethunk entry constraint: must untrain RET before the first RET.
+ *
+ * Follow every branch (intra-function) and ensure ANNOTATE_UNRET_END comes
+ * before an actual RET instruction.
+ */
+static int validate_entry(struct objtool_file *file, struct instruction *insn)
+{
+ struct instruction *next, *dest;
+ int ret, warnings = 0;
+
+ for (;;) {
+ next = next_insn_to_validate(file, insn);
+
+ if (insn->visited & VISITED_ENTRY)
+ return 0;
+
+ insn->visited |= VISITED_ENTRY;
+
+ if (!insn->ignore_alts && !list_empty(&insn->alts)) {
+ struct alternative *alt;
+ bool skip_orig = false;
+
+ list_for_each_entry(alt, &insn->alts, list) {
+ if (alt->skip_orig)
+ skip_orig = true;
+
+ ret = validate_entry(file, alt->insn);
+ if (ret) {
+ if (opts.backtrace)
+ BT_FUNC("(alt)", insn);
+ return ret;
+ }
+ }
+
+ if (skip_orig)
+ return 0;
+ }
+
+ switch (insn->type) {
+
+ case INSN_CALL_DYNAMIC:
+ case INSN_JUMP_DYNAMIC:
+ case INSN_JUMP_DYNAMIC_CONDITIONAL:
+ WARN_FUNC("early indirect call", insn->sec, insn->offset);
+ return 1;
+
+ case INSN_JUMP_UNCONDITIONAL:
+ case INSN_JUMP_CONDITIONAL:
+ if (!is_sibling_call(insn)) {
+ if (!insn->jump_dest) {
+ WARN_FUNC("unresolved jump target after linking?!?",
+ insn->sec, insn->offset);
+ return -1;
+ }
+ ret = validate_entry(file, insn->jump_dest);
+ if (ret) {
+ if (opts.backtrace) {
+ BT_FUNC("(branch%s)", insn,
+ insn->type == INSN_JUMP_CONDITIONAL ? "-cond" : "");
+ }
+ return ret;
+ }
+
+ if (insn->type == INSN_JUMP_UNCONDITIONAL)
+ return 0;
+
+ break;
+ }
+
+ /* fallthrough */
+ case INSN_CALL:
+ dest = find_insn(file, insn->call_dest->sec,
+ insn->call_dest->offset);
+ if (!dest) {
+ WARN("Unresolved function after linking!?: %s",
+ insn->call_dest->name);
+ return -1;
+ }
+
+ ret = validate_entry(file, dest);
+ if (ret) {
+ if (opts.backtrace)
+ BT_FUNC("(call)", insn);
+ return ret;
+ }
+ /*
+ * If a call returns without error, it must have seen UNTRAIN_RET.
+ * Therefore any non-error return is a success.
+ */
+ return 0;
+
+ case INSN_RETURN:
+ WARN_FUNC("RET before UNTRAIN", insn->sec, insn->offset);
+ return 1;
+
+ case INSN_NOP:
+ if (insn->retpoline_safe)
+ return 0;
+ break;
+
+ default:
+ break;
+ }
+
+ if (!next) {
+ WARN_FUNC("teh end!", insn->sec, insn->offset);
+ return -1;
+ }
+ insn = next;
+ }
+
+ return warnings;
+}
+
+/*
+ * Validate that all branches starting at 'insn->entry' encounter UNRET_END
+ * before RET.
+ */
+static int validate_unret(struct objtool_file *file)
+{
+ struct instruction *insn;
+ int ret, warnings = 0;
+
+ for_each_insn(file, insn) {
+ if (!insn->entry)
+ continue;
+
+ ret = validate_entry(file, insn);
+ if (ret < 0) {
+ WARN_FUNC("Failed UNRET validation", insn->sec, insn->offset);
+ return ret;
+ }
+ warnings += ret;
+ }
+
+ return warnings;
+}
+
static int validate_retpoline(struct objtool_file *file)
{
struct instruction *insn;
@@ -3440,7 +3715,8 @@ static int validate_retpoline(struct objtool_file *file)
for_each_insn(file, insn) {
if (insn->type != INSN_JUMP_DYNAMIC &&
- insn->type != INSN_CALL_DYNAMIC)
+ insn->type != INSN_CALL_DYNAMIC &&
+ insn->type != INSN_RETURN)
continue;
if (insn->retpoline_safe)
@@ -3455,9 +3731,17 @@ static int validate_retpoline(struct objtool_file *file)
if (!strcmp(insn->sec->name, ".init.text") && !opts.module)
continue;
- WARN_FUNC("indirect %s found in RETPOLINE build",
- insn->sec, insn->offset,
- insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
+ if (insn->type == INSN_RETURN) {
+ if (opts.rethunk) {
+ WARN_FUNC("'naked' return found in RETHUNK build",
+ insn->sec, insn->offset);
+ } else
+ continue;
+ } else {
+ WARN_FUNC("indirect %s found in RETPOLINE build",
+ insn->sec, insn->offset,
+ insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
+ }
warnings++;
}
@@ -3945,6 +4229,17 @@ int check(struct objtool_file *file)
warnings += ret;
}
+ if (opts.unret) {
+ /*
+ * Must be after validate_branch() and friends, it plays
+ * further games with insn->visited.
+ */
+ ret = validate_unret(file);
+ if (ret < 0)
+ return ret;
+ warnings += ret;
+ }
+
if (opts.ibt) {
ret = validate_ibt(file);
if (ret < 0)
@@ -3973,6 +4268,13 @@ int check(struct objtool_file *file)
warnings += ret;
}
+ if (opts.rethunk) {
+ ret = create_return_sites_sections(file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+ }
+
if (opts.mcount) {
ret = create_mcount_loc_sections(file);
if (ret < 0)
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 9b19cc304195..beb2f3aa94ff 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -89,6 +89,7 @@ const char *arch_ret_insn(int len);
int arch_decode_hint_reg(u8 sp_reg, int *base);
bool arch_is_retpoline(struct symbol *sym);
+bool arch_is_rethunk(struct symbol *sym);
int arch_rewrite_retpolines(struct objtool_file *file);
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index 280ea18b7f2b..42a52f1a0add 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -19,6 +19,8 @@ struct opts {
bool noinstr;
bool orc;
bool retpoline;
+ bool rethunk;
+ bool unret;
bool sls;
bool stackval;
bool static_call;
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index f10d7374f388..036129cebeee 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -46,16 +46,19 @@ struct instruction {
enum insn_type type;
unsigned long immediate;
- u8 dead_end : 1,
- ignore : 1,
- ignore_alts : 1,
- hint : 1,
- retpoline_safe : 1,
- noendbr : 1;
- /* 2 bit hole */
+ u16 dead_end : 1,
+ ignore : 1,
+ ignore_alts : 1,
+ hint : 1,
+ save : 1,
+ restore : 1,
+ retpoline_safe : 1,
+ noendbr : 1,
+ entry : 1;
+ /* 7 bit hole */
+
s8 instr;
u8 visited;
- /* u8 hole */
struct alt_group *alt_group;
struct symbol *call_dest;
@@ -69,6 +72,11 @@ struct instruction {
struct cfi_state *cfi;
};
+#define VISITED_BRANCH 0x01
+#define VISITED_BRANCH_UACCESS 0x02
+#define VISITED_BRANCH_MASK 0x03
+#define VISITED_ENTRY 0x04
+
static inline bool is_static_jump(struct instruction *insn)
{
return insn->type == INSN_JUMP_CONDITIONAL ||
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index adebfbc2b518..16f4067b82ae 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -57,6 +57,7 @@ struct symbol {
u8 uaccess_safe : 1;
u8 static_call_tramp : 1;
u8 retpoline_thunk : 1;
+ u8 return_thunk : 1;
u8 fentry : 1;
u8 profiling_func : 1;
struct list_head pv_target;
diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
index a6e72d916807..7f2d1b095333 100644
--- a/tools/objtool/include/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -24,6 +24,7 @@ struct objtool_file {
struct list_head insn_list;
DECLARE_HASHTABLE(insn_hash, 20);
struct list_head retpoline_call_list;
+ struct list_head return_thunk_list;
struct list_head static_call_list;
struct list_head mcount_loc_list;
struct list_head endbr_list;
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 512669ce064c..a7ecc32e3512 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -102,6 +102,7 @@ struct objtool_file *objtool_open_read(const char *_objname)
INIT_LIST_HEAD(&file.insn_list);
hash_init(file.insn_hash);
INIT_LIST_HEAD(&file.retpoline_call_list);
+ INIT_LIST_HEAD(&file.return_thunk_list);
INIT_LIST_HEAD(&file.static_call_list);
INIT_LIST_HEAD(&file.mcount_loc_list);
INIT_LIST_HEAD(&file.endbr_list);