aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/Makefile53
-rw-r--r--arch/x86/boot/Makefile7
-rw-r--r--arch/x86/boot/compressed/efi_thunk_64.S30
-rw-r--r--arch/x86/boot/compressed/head_64.S3
-rw-r--r--arch/x86/boot/compressed/kaslr.c2
-rw-r--r--arch/x86/crypto/Makefile6
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c5
-rw-r--r--arch/x86/crypto/sm4-aesni-avx-asm_64.S589
-rw-r--r--arch/x86/crypto/sm4-aesni-avx2-asm_64.S497
-rw-r--r--arch/x86/crypto/sm4-avx.h24
-rw-r--r--arch/x86/crypto/sm4_aesni_avx2_glue.c169
-rw-r--r--arch/x86/crypto/sm4_aesni_avx_glue.c487
-rw-r--r--arch/x86/events/Kconfig10
-rw-r--r--arch/x86/events/amd/Makefile5
-rw-r--r--arch/x86/events/amd/ibs.c32
-rw-r--r--arch/x86/events/amd/power.c1
-rw-r--r--arch/x86/events/amd/uncore.c40
-rw-r--r--arch/x86/events/core.c16
-rw-r--r--arch/x86/events/intel/core.c31
-rw-r--r--arch/x86/events/intel/pt.c6
-rw-r--r--arch/x86/events/intel/uncore.c45
-rw-r--r--arch/x86/events/intel/uncore.h4
-rw-r--r--arch/x86/events/intel/uncore_discovery.c42
-rw-r--r--arch/x86/events/intel/uncore_discovery.h21
-rw-r--r--arch/x86/events/intel/uncore_snbep.c585
-rw-r--r--arch/x86/events/perf_event.h18
-rw-r--r--arch/x86/include/asm/amd-ibs.h132
-rw-r--r--arch/x86/include/asm/kfence.h4
-rw-r--r--arch/x86/include/asm/kvm_host.h7
-rw-r--r--arch/x86/include/asm/mce.h1
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/svm.h2
-rw-r--r--arch/x86/kernel/apic/io_apic.c6
-rw-r--r--arch/x86/kernel/apic/msi.c11
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/common.c6
-rw-r--r--arch/x86/kernel/cpu/mce/core.c11
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c8
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c18
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.c8
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c276
-rw-r--r--arch/x86/kernel/cpu/resctrl/ctrlmondata.c163
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h231
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c71
-rw-r--r--arch/x86/kernel/cpu/resctrl/pseudo_lock.c12
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c461
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kvm/cpuid.c28
-rw-r--r--arch/x86/kvm/hyperv.c20
-rw-r--r--arch/x86/kvm/ioapic.c2
-rw-r--r--arch/x86/kvm/ioapic.h4
-rw-r--r--arch/x86/kvm/mmu/mmu.c30
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c35
-rw-r--r--arch/x86/kvm/svm/avic.c2
-rw-r--r--arch/x86/kvm/svm/nested.c23
-rw-r--r--arch/x86/kvm/svm/sev.c45
-rw-r--r--arch/x86/kvm/svm/svm.c35
-rw-r--r--arch/x86/kvm/svm/svm.h6
-rw-r--r--arch/x86/kvm/svm/svm_onhyperv.h2
-rw-r--r--arch/x86/kvm/trace.h15
-rw-r--r--arch/x86/kvm/vmx/nested.c56
-rw-r--r--arch/x86/kvm/vmx/vmx.h2
-rw-r--r--arch/x86/kvm/x86.c17
-rw-r--r--arch/x86/mm/mmio-mod.c4
-rw-r--r--arch/x86/net/bpf_jit_comp.c7
-rw-r--r--arch/x86/net/bpf_jit_comp32.c6
-rw-r--r--arch/x86/power/cpu.c31
-rw-r--r--arch/x86/tools/chkobjdump.awk1
-rw-r--r--arch/x86/tools/relocs.c45
-rw-r--r--arch/x86/tools/relocs.h1
71 files changed, 3641 insertions, 939 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d8a2c3fe492a..421fa9e38c60 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -203,7 +203,6 @@ config X86
select HAVE_FUNCTION_TRACER
select HAVE_GCC_PLUGINS
select HAVE_HW_BREAKPOINT
- select HAVE_IDE
select HAVE_IOREMAP_PROT
select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64
select HAVE_IRQ_TIME_ACCOUNTING
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 307fd0000a83..d82d01490dd3 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -31,8 +31,8 @@ REALMODE_CFLAGS := -m16 -g -Os -DDISABLE_BRANCH_PROFILING \
REALMODE_CFLAGS += -ffreestanding
REALMODE_CFLAGS += -fno-stack-protector
-REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -Wno-address-of-packed-member)
-REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), $(cc_stack_align4))
+REALMODE_CFLAGS += -Wno-address-of-packed-member
+REALMODE_CFLAGS += $(cc_stack_align4)
REALMODE_CFLAGS += $(CLANG_FLAGS)
export REALMODE_CFLAGS
@@ -48,8 +48,7 @@ export BITS
#
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
#
-KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
-KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
+KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
# Intel CET isn't enabled in the kernel
KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
@@ -59,9 +58,8 @@ ifeq ($(CONFIG_X86_32),y)
UTS_MACHINE := i386
CHECKFLAGS += -D__i386__
- biarch := $(call cc-option,-m32)
- KBUILD_AFLAGS += $(biarch)
- KBUILD_CFLAGS += $(biarch)
+ KBUILD_AFLAGS += -m32
+ KBUILD_CFLAGS += -m32
KBUILD_CFLAGS += -msoft-float -mregparm=3 -freg-struct-return
@@ -72,7 +70,7 @@ ifeq ($(CONFIG_X86_32),y)
# Align the stack to the register width instead of using the default
# alignment of 16 bytes. This reduces stack usage and the number of
# alignment instructions.
- KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align4))
+ KBUILD_CFLAGS += $(cc_stack_align4)
# CPU-specific tuning. Anything which can be shared with UML should go here.
include arch/x86/Makefile_32.cpu
@@ -93,7 +91,6 @@ else
UTS_MACHINE := x86_64
CHECKFLAGS += -D__x86_64__
- biarch := -m64
KBUILD_AFLAGS += -m64
KBUILD_CFLAGS += -m64
@@ -104,7 +101,7 @@ else
KBUILD_CFLAGS += $(call cc-option,-falign-loops=1)
# Don't autogenerate traditional x87 instructions
- KBUILD_CFLAGS += $(call cc-option,-mno-80387)
+ KBUILD_CFLAGS += -mno-80387
KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387)
# By default gcc and clang use a stack alignment of 16 bytes for x86.
@@ -114,20 +111,17 @@ else
# default alignment which keep the stack *mis*aligned.
# Furthermore an alignment to the register width reduces stack usage
# and the number of alignment instructions.
- KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align8))
+ KBUILD_CFLAGS += $(cc_stack_align8)
# Use -mskip-rax-setup if supported.
KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup)
# FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
- cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
- cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
-
- cflags-$(CONFIG_MCORE2) += \
- $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
- cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
- $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
- cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
+ cflags-$(CONFIG_MK8) += -march=k8
+ cflags-$(CONFIG_MPSC) += -march=nocona
+ cflags-$(CONFIG_MCORE2) += -march=core2
+ cflags-$(CONFIG_MATOM) += -march=atom
+ cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic
KBUILD_CFLAGS += $(cflags-y)
KBUILD_CFLAGS += -mno-red-zone
@@ -158,18 +152,6 @@ export CONFIG_X86_X32_ABI
ifdef CONFIG_FUNCTION_GRAPH_TRACER
ifndef CONFIG_HAVE_FENTRY
ACCUMULATE_OUTGOING_ARGS := 1
- else
- ifeq ($(call cc-option-yn, -mfentry), n)
- ACCUMULATE_OUTGOING_ARGS := 1
-
- # GCC ignores '-maccumulate-outgoing-args' when used with '-Os'.
- # If '-Os' is enabled, disable it and print a warning.
- ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
- undefine CONFIG_CC_OPTIMIZE_FOR_SIZE
- $(warning Disabling CONFIG_CC_OPTIMIZE_FOR_SIZE. Your compiler does not have -mfentry so you cannot optimize for size with CONFIG_FUNCTION_GRAPH_TRACER.)
- endif
-
- endif
endif
endif
@@ -193,7 +175,7 @@ ifdef CONFIG_RETPOLINE
# only been fixed starting from gcc stable version 8.4.0 and
# onwards, but not for older ones. See gcc bug #86952.
ifndef CONFIG_CC_IS_CLANG
- KBUILD_CFLAGS += $(call cc-option,-fno-jump-tables)
+ KBUILD_CFLAGS += -fno-jump-tables
endif
endif
@@ -275,9 +257,10 @@ endif
$(BOOT_TARGETS): vmlinux
$(Q)$(MAKE) $(build)=$(boot) $@
-PHONY += install bzlilo
-install bzlilo:
- $(Q)$(MAKE) $(build)=$(boot) $@
+PHONY += install
+install:
+ $(CONFIG_SHELL) $(srctree)/$(boot)/install.sh $(KERNELRELEASE) \
+ $(KBUILD_IMAGE) System.map "$(INSTALL_PATH)"
PHONY += vdso_install
vdso_install:
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index dfbc26a8e924..b5aecb524a8a 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -133,7 +133,7 @@ quiet_cmd_genimage = GENIMAGE $3
cmd_genimage = $(BASH) $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \
$(obj)/mtools.conf '$(FDARGS)' $(FDINITRD)
-PHONY += bzdisk fdimage fdimage144 fdimage288 hdimage isoimage install
+PHONY += bzdisk fdimage fdimage144 fdimage288 hdimage isoimage
# This requires write access to /dev/fd0
# All images require syslinux to be installed; hdimage also requires
@@ -156,8 +156,3 @@ hdimage: $(imgdeps)
isoimage: $(imgdeps)
$(call cmd,genimage,isoimage,$(obj)/image.iso)
@$(kecho) 'Kernel: $(obj)/image.iso is ready'
-
-install:
- $(CONFIG_SHELL) $(srctree)/$(src)/install.sh \
- $(KERNELRELEASE) $(obj)/bzImage \
- System.map "$(INSTALL_PATH)"
diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S
index 95a223b3e56a..8bb92e9f4e97 100644
--- a/arch/x86/boot/compressed/efi_thunk_64.S
+++ b/arch/x86/boot/compressed/efi_thunk_64.S
@@ -5,9 +5,8 @@
* Early support for invoking 32-bit EFI services from a 64-bit kernel.
*
* Because this thunking occurs before ExitBootServices() we have to
- * restore the firmware's 32-bit GDT before we make EFI service calls,
- * since the firmware's 32-bit IDT is still currently installed and it
- * needs to be able to service interrupts.
+ * restore the firmware's 32-bit GDT and IDT before we make EFI service
+ * calls.
*
* On the plus side, we don't have to worry about mangling 64-bit
* addresses into 32-bits because we're executing with an identity
@@ -39,7 +38,7 @@ SYM_FUNC_START(__efi64_thunk)
/*
* Convert x86-64 ABI params to i386 ABI
*/
- subq $32, %rsp
+ subq $64, %rsp
movl %esi, 0x0(%rsp)
movl %edx, 0x4(%rsp)
movl %ecx, 0x8(%rsp)
@@ -49,14 +48,19 @@ SYM_FUNC_START(__efi64_thunk)
leaq 0x14(%rsp), %rbx
sgdt (%rbx)
+ addq $16, %rbx
+ sidt (%rbx)
+
/*
- * Switch to gdt with 32-bit segments. This is the firmware GDT
- * that was installed when the kernel started executing. This
- * pointer was saved at the EFI stub entry point in head_64.S.
+ * Switch to IDT and GDT with 32-bit segments. This is the firmware GDT
+ * and IDT that was installed when the kernel started executing. The
+ * pointers were saved at the EFI stub entry point in head_64.S.
*
* Pass the saved DS selector to the 32-bit code, and use far return to
* restore the saved CS selector.
*/
+ leaq efi32_boot_idt(%rip), %rax
+ lidt (%rax)
leaq efi32_boot_gdt(%rip), %rax
lgdt (%rax)
@@ -67,7 +71,7 @@ SYM_FUNC_START(__efi64_thunk)
pushq %rax
lretq
-1: addq $32, %rsp
+1: addq $64, %rsp
movq %rdi, %rax
pop %rbx
@@ -128,10 +132,13 @@ SYM_FUNC_START_LOCAL(efi_enter32)
/*
* Some firmware will return with interrupts enabled. Be sure to
- * disable them before we switch GDTs.
+ * disable them before we switch GDTs and IDTs.
*/
cli
+ lidtl (%ebx)
+ subl $16, %ebx
+
lgdtl (%ebx)
movl %cr4, %eax
@@ -166,6 +173,11 @@ SYM_DATA_START(efi32_boot_gdt)
.quad 0
SYM_DATA_END(efi32_boot_gdt)
+SYM_DATA_START(efi32_boot_idt)
+ .word 0
+ .quad 0
+SYM_DATA_END(efi32_boot_idt)
+
SYM_DATA_START(efi32_boot_cs)
.word 0
SYM_DATA_END(efi32_boot_cs)
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index a2347ded77ea..572c535cf45b 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -319,6 +319,9 @@ SYM_INNER_LABEL(efi32_pe_stub_entry, SYM_L_LOCAL)
movw %cs, rva(efi32_boot_cs)(%ebp)
movw %ds, rva(efi32_boot_ds)(%ebp)
+ /* Store firmware IDT descriptor */
+ sidtl rva(efi32_boot_idt)(%ebp)
+
/* Disable paging */
movl %cr0, %eax
btrl $X86_CR0_PG_BIT, %eax
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index e36690778497..67c3208b668a 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -668,7 +668,7 @@ static bool process_mem_region(struct mem_vector *region,
if (slot_area_index == MAX_SLOT_AREA) {
debug_putstr("Aborted e820/efi memmap scan when walking immovable regions(slot_areas full)!\n");
- return 1;
+ return true;
}
}
#endif
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index d0959e7b809f..f307c93fc90a 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -88,6 +88,12 @@ nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o
obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
+obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o
+sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o
+
+obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX2_X86_64) += sm4-aesni-avx2-x86_64.o
+sm4-aesni-avx2-x86_64-y := sm4-aesni-avx2-asm_64.o sm4_aesni_avx2_glue.o
+
quiet_cmd_perlasm = PERLASM $@
cmd_perlasm = $(PERL) $< > $@
$(obj)/%.S: $(src)/%.pl FORCE
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 2144e54a6c89..0fc961bef299 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -849,6 +849,8 @@ static int xts_crypt(struct skcipher_request *req, bool encrypt)
return -EINVAL;
err = skcipher_walk_virt(&walk, req, false);
+ if (!walk.nbytes)
+ return err;
if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
@@ -862,7 +864,10 @@ static int xts_crypt(struct skcipher_request *req, bool encrypt)
skcipher_request_set_crypt(&subreq, req->src, req->dst,
blocks * AES_BLOCK_SIZE, req->iv);
req = &subreq;
+
err = skcipher_walk_virt(&walk, req, false);
+ if (err)
+ return err;
} else {
tail = 0;
}
diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S
new file mode 100644
index 000000000000..fa2c3f50aecb
--- /dev/null
+++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S
@@ -0,0 +1,589 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4 Cipher Algorithm, AES-NI/AVX optimized.
+ * as specified in
+ * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
+ *
+ * Copyright (C) 2018 Markku-Juhani O. Saarinen <mjos@iki.fi>
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+/* Based on SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at:
+ * https://github.com/mjosaarinen/sm4ni
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+#define rRIP (%rip)
+
+#define RX0 %xmm0
+#define RX1 %xmm1
+#define MASK_4BIT %xmm2
+#define RTMP0 %xmm3
+#define RTMP1 %xmm4
+#define RTMP2 %xmm5
+#define RTMP3 %xmm6
+#define RTMP4 %xmm7
+
+#define RA0 %xmm8
+#define RA1 %xmm9
+#define RA2 %xmm10
+#define RA3 %xmm11
+
+#define RB0 %xmm12
+#define RB1 %xmm13
+#define RB2 %xmm14
+#define RB3 %xmm15
+
+#define RNOT %xmm0
+#define RBSWAP %xmm1
+
+
+/* Transpose four 32-bit words between 128-bit vectors. */
+#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
+ vpunpckhdq x1, x0, t2; \
+ vpunpckldq x1, x0, x0; \
+ \
+ vpunpckldq x3, x2, t1; \
+ vpunpckhdq x3, x2, x2; \
+ \
+ vpunpckhqdq t1, x0, x1; \
+ vpunpcklqdq t1, x0, x0; \
+ \
+ vpunpckhqdq x2, t2, x3; \
+ vpunpcklqdq x2, t2, x2;
+
+/* pre-SubByte transform. */
+#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \
+ vpand x, mask4bit, tmp0; \
+ vpandn x, mask4bit, x; \
+ vpsrld $4, x, x; \
+ \
+ vpshufb tmp0, lo_t, tmp0; \
+ vpshufb x, hi_t, x; \
+ vpxor tmp0, x, x;
+
+/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by
+ * 'vaeslastenc' instruction.
+ */
+#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \
+ vpandn mask4bit, x, tmp0; \
+ vpsrld $4, x, x; \
+ vpand x, mask4bit, x; \
+ \
+ vpshufb tmp0, lo_t, tmp0; \
+ vpshufb x, hi_t, x; \
+ vpxor tmp0, x, x;
+
+
+.section .rodata.cst164, "aM", @progbits, 164
+.align 16
+
+/*
+ * Following four affine transform look-up tables are from work by
+ * Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni
+ *
+ * These allow exposing SM4 S-Box from AES SubByte.
+ */
+
+/* pre-SubByte affine transform, from SM4 field to AES field. */
+.Lpre_tf_lo_s:
+ .quad 0x9197E2E474720701, 0xC7C1B4B222245157
+.Lpre_tf_hi_s:
+ .quad 0xE240AB09EB49A200, 0xF052B91BF95BB012
+
+/* post-SubByte affine transform, from AES field to SM4 field. */
+.Lpost_tf_lo_s:
+ .quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82
+.Lpost_tf_hi_s:
+ .quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF
+
+/* For isolating SubBytes from AESENCLAST, inverse shift row */
+.Linv_shift_row:
+ .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
+ .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
+
+/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_8:
+ .byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e
+ .byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06
+
+/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_16:
+ .byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01
+ .byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09
+
+/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_24:
+ .byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04
+ .byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c
+
+/* For CTR-mode IV byteswap */
+.Lbswap128_mask:
+ .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+/* For input word byte-swap */
+.Lbswap32_mask:
+ .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+
+.align 4
+/* 4-bit mask */
+.L0f0f0f0f:
+ .long 0x0f0f0f0f
+
+
+.text
+.align 16
+
+/*
+ * void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst,
+ * const u8 *src, int nblocks)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx_crypt4)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (1..4 blocks)
+ * %rdx: src (1..4 blocks)
+ * %rcx: num blocks (1..4)
+ */
+ FRAME_BEGIN
+
+ vmovdqu 0*16(%rdx), RA0;
+ vmovdqa RA0, RA1;
+ vmovdqa RA0, RA2;
+ vmovdqa RA0, RA3;
+ cmpq $2, %rcx;
+ jb .Lblk4_load_input_done;
+ vmovdqu 1*16(%rdx), RA1;
+ je .Lblk4_load_input_done;
+ vmovdqu 2*16(%rdx), RA2;
+ cmpq $3, %rcx;
+ je .Lblk4_load_input_done;
+ vmovdqu 3*16(%rdx), RA3;
+
+.Lblk4_load_input_done:
+
+ vmovdqa .Lbswap32_mask rRIP, RTMP2;
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+
+ vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT;
+ vmovdqa .Lpre_tf_lo_s rRIP, RTMP4;
+ vmovdqa .Lpre_tf_hi_s rRIP, RB0;
+ vmovdqa .Lpost_tf_lo_s rRIP, RB1;
+ vmovdqa .Lpost_tf_hi_s rRIP, RB2;
+ vmovdqa .Linv_shift_row rRIP, RB3;
+ vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP2;
+ vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP3;
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+
+#define ROUND(round, s0, s1, s2, s3) \
+ vbroadcastss (4*(round))(%rdi), RX0; \
+ vpxor s1, RX0, RX0; \
+ vpxor s2, RX0, RX0; \
+ vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
+ \
+ /* sbox, non-linear part */ \
+ transform_pre(RX0, RTMP4, RB0, MASK_4BIT, RTMP0); \
+ vaesenclast MASK_4BIT, RX0, RX0; \
+ transform_post(RX0, RB1, RB2, MASK_4BIT, RTMP0); \
+ \
+ /* linear part */ \
+ vpshufb RB3, RX0, RTMP0; \
+ vpxor RTMP0, s0, s0; /* s0 ^ x */ \
+ vpshufb RTMP2, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
+ vpshufb RTMP3, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
+ vpshufb .Linv_shift_row_rol_24 rRIP, RX0, RTMP1; \
+ vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
+ vpslld $2, RTMP0, RTMP1; \
+ vpsrld $30, RTMP0, RTMP0; \
+ vpxor RTMP0, s0, s0; \
+ /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
+ vpxor RTMP1, s0, s0;
+
+ leaq (32*4)(%rdi), %rax;
+.align 16
+.Lroundloop_blk4:
+ ROUND(0, RA0, RA1, RA2, RA3);
+ ROUND(1, RA1, RA2, RA3, RA0);
+ ROUND(2, RA2, RA3, RA0, RA1);
+ ROUND(3, RA3, RA0, RA1, RA2);
+ leaq (4*4)(%rdi), %rdi;
+ cmpq %rax, %rdi;
+ jne .Lroundloop_blk4;
+
+#undef ROUND
+
+ vmovdqa .Lbswap128_mask rRIP, RTMP2;
+
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+
+ vmovdqu RA0, 0*16(%rsi);
+ cmpq $2, %rcx;
+ jb .Lblk4_store_output_done;
+ vmovdqu RA1, 1*16(%rsi);
+ je .Lblk4_store_output_done;
+ vmovdqu RA2, 2*16(%rsi);
+ cmpq $3, %rcx;
+ je .Lblk4_store_output_done;
+ vmovdqu RA3, 3*16(%rsi);
+
+.Lblk4_store_output_done:
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx_crypt4)
+
+.align 8
+SYM_FUNC_START_LOCAL(__sm4_crypt_blk8)
+ /* input:
+ * %rdi: round key array, CTX
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel
+ * plaintext blocks
+ * output:
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel
+ * ciphertext blocks
+ */
+ FRAME_BEGIN
+
+ vmovdqa .Lbswap32_mask rRIP, RTMP2;
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+ vpshufb RTMP2, RB0, RB0;
+ vpshufb RTMP2, RB1, RB1;
+ vpshufb RTMP2, RB2, RB2;
+ vpshufb RTMP2, RB3, RB3;
+
+ vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT;
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+ transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
+
+#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \
+ vbroadcastss (4*(round))(%rdi), RX0; \
+ vmovdqa .Lpre_tf_lo_s rRIP, RTMP4; \
+ vmovdqa .Lpre_tf_hi_s rRIP, RTMP1; \
+ vmovdqa RX0, RX1; \
+ vpxor s1, RX0, RX0; \
+ vpxor s2, RX0, RX0; \
+ vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
+ vmovdqa .Lpost_tf_lo_s rRIP, RTMP2; \
+ vmovdqa .Lpost_tf_hi_s rRIP, RTMP3; \
+ vpxor r1, RX1, RX1; \
+ vpxor r2, RX1, RX1; \
+ vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \
+ \
+ /* sbox, non-linear part */ \
+ transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
+ transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
+ vmovdqa .Linv_shift_row rRIP, RTMP4; \
+ vaesenclast MASK_4BIT, RX0, RX0; \
+ vaesenclast MASK_4BIT, RX1, RX1; \
+ transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
+ transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
+ \
+ /* linear part */ \
+ vpshufb RTMP4, RX0, RTMP0; \
+ vpxor RTMP0, s0, s0; /* s0 ^ x */ \
+ vpshufb RTMP4, RX1, RTMP2; \
+ vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP4; \
+ vpxor RTMP2, r0, r0; /* r0 ^ x */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP4; \
+ vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vmovdqa .Linv_shift_row_rol_24 rRIP, RTMP4; \
+ vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
+ /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
+ vpslld $2, RTMP0, RTMP1; \
+ vpsrld $30, RTMP0, RTMP0; \
+ vpxor RTMP0, s0, s0; \
+ vpxor RTMP1, s0, s0; \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \
+ /* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
+ vpslld $2, RTMP2, RTMP3; \
+ vpsrld $30, RTMP2, RTMP2; \
+ vpxor RTMP2, r0, r0; \
+ vpxor RTMP3, r0, r0;
+
+ leaq (32*4)(%rdi), %rax;
+.align 16
+.Lroundloop_blk8:
+ ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3);
+ ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0);
+ ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1);
+ ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2);
+ leaq (4*4)(%rdi), %rdi;
+ cmpq %rax, %rdi;
+ jne .Lroundloop_blk8;
+
+#undef ROUND
+
+ vmovdqa .Lbswap128_mask rRIP, RTMP2;
+
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+ transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+ vpshufb RTMP2, RB0, RB0;
+ vpshufb RTMP2, RB1, RB1;
+ vpshufb RTMP2, RB2, RB2;
+ vpshufb RTMP2, RB3, RB3;
+
+ FRAME_END
+ ret;
+SYM_FUNC_END(__sm4_crypt_blk8)
+
+/*
+ * void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst,
+ * const u8 *src, int nblocks)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx_crypt8)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (1..8 blocks)
+ * %rdx: src (1..8 blocks)
+ * %rcx: num blocks (1..8)
+ */
+ FRAME_BEGIN
+
+ cmpq $5, %rcx;
+ jb sm4_aesni_avx_crypt4;
+ vmovdqu (0 * 16)(%rdx), RA0;
+ vmovdqu (1 * 16)(%rdx), RA1;
+ vmovdqu (2 * 16)(%rdx), RA2;
+ vmovdqu (3 * 16)(%rdx), RA3;
+ vmovdqu (4 * 16)(%rdx), RB0;
+ vmovdqa RB0, RB1;
+ vmovdqa RB0, RB2;
+ vmovdqa RB0, RB3;
+ je .Lblk8_load_input_done;
+ vmovdqu (5 * 16)(%rdx), RB1;
+ cmpq $7, %rcx;
+ jb .Lblk8_load_input_done;
+ vmovdqu (6 * 16)(%rdx), RB2;
+ je .Lblk8_load_input_done;
+ vmovdqu (7 * 16)(%rdx), RB3;
+
+.Lblk8_load_input_done:
+ call __sm4_crypt_blk8;
+
+ cmpq $6, %rcx;
+ vmovdqu RA0, (0 * 16)(%rsi);
+ vmovdqu RA1, (1 * 16)(%rsi);
+ vmovdqu RA2, (2 * 16)(%rsi);
+ vmovdqu RA3, (3 * 16)(%rsi);
+ vmovdqu RB0, (4 * 16)(%rsi);
+ jb .Lblk8_store_output_done;
+ vmovdqu RB1, (5 * 16)(%rsi);
+ je .Lblk8_store_output_done;
+ vmovdqu RB2, (6 * 16)(%rsi);
+ cmpq $7, %rcx;
+ je .Lblk8_store_output_done;
+ vmovdqu RB3, (7 * 16)(%rsi);
+
+.Lblk8_store_output_done:
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx_crypt8)
+
+/*
+ * void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst,
+ * const u8 *src, u8 *iv)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx_ctr_enc_blk8)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (8 blocks)
+ * %rdx: src (8 blocks)
+ * %rcx: iv (big endian, 128bit)
+ */
+ FRAME_BEGIN
+
+ /* load IV and byteswap */
+ vmovdqu (%rcx), RA0;
+
+ vmovdqa .Lbswap128_mask rRIP, RBSWAP;
+ vpshufb RBSWAP, RA0, RTMP0; /* be => le */
+
+ vpcmpeqd RNOT, RNOT, RNOT;
+ vpsrldq $8, RNOT, RNOT; /* low: -1, high: 0 */
+
+#define inc_le128(x, minus_one, tmp) \
+ vpcmpeqq minus_one, x, tmp; \
+ vpsubq minus_one, x, x; \
+ vpslldq $8, tmp, tmp; \
+ vpsubq tmp, x, x;
+
+ /* construct IVs */
+ inc_le128(RTMP0, RNOT, RTMP2); /* +1 */
+ vpshufb RBSWAP, RTMP0, RA1;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +2 */
+ vpshufb RBSWAP, RTMP0, RA2;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +3 */
+ vpshufb RBSWAP, RTMP0, RA3;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +4 */
+ vpshufb RBSWAP, RTMP0, RB0;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +5 */
+ vpshufb RBSWAP, RTMP0, RB1;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +6 */
+ vpshufb RBSWAP, RTMP0, RB2;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +7 */
+ vpshufb RBSWAP, RTMP0, RB3;
+ inc_le128(RTMP0, RNOT, RTMP2); /* +8 */
+ vpshufb RBSWAP, RTMP0, RTMP1;
+
+ /* store new IV */
+ vmovdqu RTMP1, (%rcx);
+
+ call __sm4_crypt_blk8;
+
+ vpxor (0 * 16)(%rdx), RA0, RA0;
+ vpxor (1 * 16)(%rdx), RA1, RA1;
+ vpxor (2 * 16)(%rdx), RA2, RA2;
+ vpxor (3 * 16)(%rdx), RA3, RA3;
+ vpxor (4 * 16)(%rdx), RB0, RB0;
+ vpxor (5 * 16)(%rdx), RB1, RB1;
+ vpxor (6 * 16)(%rdx), RB2, RB2;
+ vpxor (7 * 16)(%rdx), RB3, RB3;
+
+ vmovdqu RA0, (0 * 16)(%rsi);
+ vmovdqu RA1, (1 * 16)(%rsi);
+ vmovdqu RA2, (2 * 16)(%rsi);
+ vmovdqu RA3, (3 * 16)(%rsi);
+ vmovdqu RB0, (4 * 16)(%rsi);
+ vmovdqu RB1, (5 * 16)(%rsi);
+ vmovdqu RB2, (6 * 16)(%rsi);
+ vmovdqu RB3, (7 * 16)(%rsi);
+
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx_ctr_enc_blk8)
+
+/*
+ * void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst,
+ * const u8 *src, u8 *iv)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx_cbc_dec_blk8)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (8 blocks)
+ * %rdx: src (8 blocks)
+ * %rcx: iv
+ */
+ FRAME_BEGIN
+
+ vmovdqu (0 * 16)(%rdx), RA0;
+ vmovdqu (1 * 16)(%rdx), RA1;
+ vmovdqu (2 * 16)(%rdx), RA2;
+ vmovdqu (3 * 16)(%rdx), RA3;
+ vmovdqu (4 * 16)(%rdx), RB0;
+ vmovdqu (5 * 16)(%rdx), RB1;
+ vmovdqu (6 * 16)(%rdx), RB2;
+ vmovdqu (7 * 16)(%rdx), RB3;
+
+ call __sm4_crypt_blk8;
+
+ vmovdqu (7 * 16)(%rdx), RNOT;
+ vpxor (%rcx), RA0, RA0;
+ vpxor (0 * 16)(%rdx), RA1, RA1;
+ vpxor (1 * 16)(%rdx), RA2, RA2;
+ vpxor (2 * 16)(%rdx), RA3, RA3;
+ vpxor (3 * 16)(%rdx), RB0, RB0;
+ vpxor (4 * 16)(%rdx), RB1, RB1;
+ vpxor (5 * 16)(%rdx), RB2, RB2;
+ vpxor (6 * 16)(%rdx), RB3, RB3;
+ vmovdqu RNOT, (%rcx); /* store new IV */
+
+ vmovdqu RA0, (0 * 16)(%rsi);
+ vmovdqu RA1, (1 * 16)(%rsi);
+ vmovdqu RA2, (2 * 16)(%rsi);
+ vmovdqu RA3, (3 * 16)(%rsi);
+ vmovdqu RB0, (4 * 16)(%rsi);
+ vmovdqu RB1, (5 * 16)(%rsi);
+ vmovdqu RB2, (6 * 16)(%rsi);
+ vmovdqu RB3, (7 * 16)(%rsi);
+
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx_cbc_dec_blk8)
+
+/*
+ * void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst,
+ * const u8 *src, u8 *iv)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx_cfb_dec_blk8)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (8 blocks)
+ * %rdx: src (8 blocks)
+ * %rcx: iv
+ */
+ FRAME_BEGIN
+
+ /* Load input */
+ vmovdqu (%rcx), RA0;
+ vmovdqu 0 * 16(%rdx), RA1;
+ vmovdqu 1 * 16(%rdx), RA2;
+ vmovdqu 2 * 16(%rdx), RA3;
+ vmovdqu 3 * 16(%rdx), RB0;
+ vmovdqu 4 * 16(%rdx), RB1;
+ vmovdqu 5 * 16(%rdx), RB2;
+ vmovdqu 6 * 16(%rdx), RB3;
+
+ /* Update IV */
+ vmovdqu 7 * 16(%rdx), RNOT;
+ vmovdqu RNOT, (%rcx);
+
+ call __sm4_crypt_blk8;
+
+ vpxor (0 * 16)(%rdx), RA0, RA0;
+ vpxor (1 * 16)(%rdx), RA1, RA1;
+ vpxor (2 * 16)(%rdx), RA2, RA2;
+ vpxor (3 * 16)(%rdx), RA3, RA3;
+ vpxor (4 * 16)(%rdx), RB0, RB0;
+ vpxor (5 * 16)(%rdx), RB1, RB1;
+ vpxor (6 * 16)(%rdx), RB2, RB2;
+ vpxor (7 * 16)(%rdx), RB3, RB3;
+
+ vmovdqu RA0, (0 * 16)(%rsi);
+ vmovdqu RA1, (1 * 16)(%rsi);
+ vmovdqu RA2, (2 * 16)(%rsi);
+ vmovdqu RA3, (3 * 16)(%rsi);
+ vmovdqu RB0, (4 * 16)(%rsi);
+ vmovdqu RB1, (5 * 16)(%rsi);
+ vmovdqu RB2, (6 * 16)(%rsi);
+ vmovdqu RB3, (7 * 16)(%rsi);
+
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx_cfb_dec_blk8)
diff --git a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
new file mode 100644
index 000000000000..d2ffd7f76ee2
--- /dev/null
+++ b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
@@ -0,0 +1,497 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SM4 Cipher Algorithm, AES-NI/AVX2 optimized.
+ * as specified in
+ * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
+ *
+ * Copyright (C) 2018 Markku-Juhani O. Saarinen <mjos@iki.fi>
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+/* Based on SM4 AES-NI work by libgcrypt and Markku-Juhani O. Saarinen at:
+ * https://github.com/mjosaarinen/sm4ni
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+#define rRIP (%rip)
+
+/* vector registers */
+#define RX0 %ymm0
+#define RX1 %ymm1
+#define MASK_4BIT %ymm2
+#define RTMP0 %ymm3
+#define RTMP1 %ymm4
+#define RTMP2 %ymm5
+#define RTMP3 %ymm6
+#define RTMP4 %ymm7
+
+#define RA0 %ymm8
+#define RA1 %ymm9
+#define RA2 %ymm10
+#define RA3 %ymm11
+
+#define RB0 %ymm12
+#define RB1 %ymm13
+#define RB2 %ymm14
+#define RB3 %ymm15
+
+#define RNOT %ymm0
+#define RBSWAP %ymm1
+
+#define RX0x %xmm0
+#define RX1x %xmm1
+#define MASK_4BITx %xmm2
+
+#define RNOTx %xmm0
+#define RBSWAPx %xmm1
+
+#define RTMP0x %xmm3
+#define RTMP1x %xmm4
+#define RTMP2x %xmm5
+#define RTMP3x %xmm6
+#define RTMP4x %xmm7
+
+
+/* helper macros */
+
+/* Transpose four 32-bit words between 128-bit vector lanes. */
+#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
+ vpunpckhdq x1, x0, t2; \
+ vpunpckldq x1, x0, x0; \
+ \
+ vpunpckldq x3, x2, t1; \
+ vpunpckhdq x3, x2, x2; \
+ \
+ vpunpckhqdq t1, x0, x1; \
+ vpunpcklqdq t1, x0, x0; \
+ \
+ vpunpckhqdq x2, t2, x3; \
+ vpunpcklqdq x2, t2, x2;
+
+/* post-SubByte transform. */
+#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \
+ vpand x, mask4bit, tmp0; \
+ vpandn x, mask4bit, x; \
+ vpsrld $4, x, x; \
+ \
+ vpshufb tmp0, lo_t, tmp0; \
+ vpshufb x, hi_t, x; \
+ vpxor tmp0, x, x;
+
+/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by
+ * 'vaeslastenc' instruction. */
+#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \
+ vpandn mask4bit, x, tmp0; \
+ vpsrld $4, x, x; \
+ vpand x, mask4bit, x; \
+ \
+ vpshufb tmp0, lo_t, tmp0; \
+ vpshufb x, hi_t, x; \
+ vpxor tmp0, x, x;
+
+
+.section .rodata.cst164, "aM", @progbits, 164
+.align 16
+
+/*
+ * Following four affine transform look-up tables are from work by
+ * Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni
+ *
+ * These allow exposing SM4 S-Box from AES SubByte.
+ */
+
+/* pre-SubByte affine transform, from SM4 field to AES field. */
+.Lpre_tf_lo_s:
+ .quad 0x9197E2E474720701, 0xC7C1B4B222245157
+.Lpre_tf_hi_s:
+ .quad 0xE240AB09EB49A200, 0xF052B91BF95BB012
+
+/* post-SubByte affine transform, from AES field to SM4 field. */
+.Lpost_tf_lo_s:
+ .quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82
+.Lpost_tf_hi_s:
+ .quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF
+
+/* For isolating SubBytes from AESENCLAST, inverse shift row */
+.Linv_shift_row:
+ .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
+ .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
+
+/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_8:
+ .byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e
+ .byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06
+
+/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_16:
+ .byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01
+ .byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09
+
+/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_24:
+ .byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04
+ .byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c
+
+/* For CTR-mode IV byteswap */
+.Lbswap128_mask:
+ .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+/* For input word byte-swap */
+.Lbswap32_mask:
+ .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+
+.align 4
+/* 4-bit mask */
+.L0f0f0f0f:
+ .long 0x0f0f0f0f
+
+.text
+.align 16
+
+.align 8
+SYM_FUNC_START_LOCAL(__sm4_crypt_blk16)
+ /* input:
+ * %rdi: round key array, CTX
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
+ * plaintext blocks
+ * output:
+ * RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
+ * ciphertext blocks
+ */
+ FRAME_BEGIN
+
+ vbroadcasti128 .Lbswap32_mask rRIP, RTMP2;
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+ vpshufb RTMP2, RB0, RB0;
+ vpshufb RTMP2, RB1, RB1;
+ vpshufb RTMP2, RB2, RB2;
+ vpshufb RTMP2, RB3, RB3;
+
+ vpbroadcastd .L0f0f0f0f rRIP, MASK_4BIT;
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+ transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
+
+#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \
+ vpbroadcastd (4*(round))(%rdi), RX0; \
+ vbroadcasti128 .Lpre_tf_lo_s rRIP, RTMP4; \
+ vbroadcasti128 .Lpre_tf_hi_s rRIP, RTMP1; \
+ vmovdqa RX0, RX1; \
+ vpxor s1, RX0, RX0; \
+ vpxor s2, RX0, RX0; \
+ vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
+ vbroadcasti128 .Lpost_tf_lo_s rRIP, RTMP2; \
+ vbroadcasti128 .Lpost_tf_hi_s rRIP, RTMP3; \
+ vpxor r1, RX1, RX1; \
+ vpxor r2, RX1, RX1; \
+ vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \
+ \
+ /* sbox, non-linear part */ \
+ transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
+ transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
+ vextracti128 $1, RX0, RTMP4x; \
+ vextracti128 $1, RX1, RTMP0x; \
+ vaesenclast MASK_4BITx, RX0x, RX0x; \
+ vaesenclast MASK_4BITx, RTMP4x, RTMP4x; \
+ vaesenclast MASK_4BITx, RX1x, RX1x; \
+ vaesenclast MASK_4BITx, RTMP0x, RTMP0x; \
+ vinserti128 $1, RTMP4x, RX0, RX0; \
+ vbroadcasti128 .Linv_shift_row rRIP, RTMP4; \
+ vinserti128 $1, RTMP0x, RX1, RX1; \
+ transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
+ transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
+ \
+ /* linear part */ \
+ vpshufb RTMP4, RX0, RTMP0; \
+ vpxor RTMP0, s0, s0; /* s0 ^ x */ \
+ vpshufb RTMP4, RX1, RTMP2; \
+ vbroadcasti128 .Linv_shift_row_rol_8 rRIP, RTMP4; \
+ vpxor RTMP2, r0, r0; /* r0 ^ x */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vbroadcasti128 .Linv_shift_row_rol_16 rRIP, RTMP4; \
+ vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vbroadcasti128 .Linv_shift_row_rol_24 rRIP, RTMP4; \
+ vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \
+ vpshufb RTMP4, RX0, RTMP1; \
+ vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
+ vpslld $2, RTMP0, RTMP1; \
+ vpsrld $30, RTMP0, RTMP0; \
+ vpxor RTMP0, s0, s0; \
+ /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
+ vpxor RTMP1, s0, s0; \
+ vpshufb RTMP4, RX1, RTMP3; \
+ vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \
+ vpslld $2, RTMP2, RTMP3; \
+ vpsrld $30, RTMP2, RTMP2; \
+ vpxor RTMP2, r0, r0; \
+ /* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ rol(x,24) */ \
+ vpxor RTMP3, r0, r0;
+
+ leaq (32*4)(%rdi), %rax;
+.align 16
+.Lroundloop_blk8:
+ ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3);
+ ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0);
+ ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1);
+ ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2);
+ leaq (4*4)(%rdi), %rdi;
+ cmpq %rax, %rdi;
+ jne .Lroundloop_blk8;
+
+#undef ROUND
+
+ vbroadcasti128 .Lbswap128_mask rRIP, RTMP2;
+
+ transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+ transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
+ vpshufb RTMP2, RA0, RA0;
+ vpshufb RTMP2, RA1, RA1;
+ vpshufb RTMP2, RA2, RA2;
+ vpshufb RTMP2, RA3, RA3;
+ vpshufb RTMP2, RB0, RB0;
+ vpshufb RTMP2, RB1, RB1;
+ vpshufb RTMP2, RB2, RB2;
+ vpshufb RTMP2, RB3, RB3;
+
+ FRAME_END
+ ret;
+SYM_FUNC_END(__sm4_crypt_blk16)
+
+#define inc_le128(x, minus_one, tmp) \
+ vpcmpeqq minus_one, x, tmp; \
+ vpsubq minus_one, x, x; \
+ vpslldq $8, tmp, tmp; \
+ vpsubq tmp, x, x;
+
+/*
+ * void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst,
+ * const u8 *src, u8 *iv)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: iv (big endian, 128bit)
+ */
+ FRAME_BEGIN
+
+ movq 8(%rcx), %rax;
+ bswapq %rax;
+
+ vzeroupper;
+
+ vbroadcasti128 .Lbswap128_mask rRIP, RTMP3;
+ vpcmpeqd RNOT, RNOT, RNOT;
+ vpsrldq $8, RNOT, RNOT; /* ab: -1:0 ; cd: -1:0 */
+ vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */
+
+ /* load IV and byteswap */
+ vmovdqu (%rcx), RTMP4x;
+ vpshufb RTMP3x, RTMP4x, RTMP4x;
+ vmovdqa RTMP4x, RTMP0x;
+ inc_le128(RTMP4x, RNOTx, RTMP1x);
+ vinserti128 $1, RTMP4x, RTMP0, RTMP0;
+ vpshufb RTMP3, RTMP0, RA0; /* +1 ; +0 */
+
+ /* check need for handling 64-bit overflow and carry */
+ cmpq $(0xffffffffffffffff - 16), %rax;
+ ja .Lhandle_ctr_carry;
+
+ /* construct IVs */
+ vpsubq RTMP2, RTMP0, RTMP0; /* +3 ; +2 */
+ vpshufb RTMP3, RTMP0, RA1;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +5 ; +4 */
+ vpshufb RTMP3, RTMP0, RA2;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +7 ; +6 */
+ vpshufb RTMP3, RTMP0, RA3;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +9 ; +8 */
+ vpshufb RTMP3, RTMP0, RB0;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +11 ; +10 */
+ vpshufb RTMP3, RTMP0, RB1;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +13 ; +12 */
+ vpshufb RTMP3, RTMP0, RB2;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +15 ; +14 */
+ vpshufb RTMP3, RTMP0, RB3;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +16 */
+ vpshufb RTMP3x, RTMP0x, RTMP0x;
+
+ jmp .Lctr_carry_done;
+
+.Lhandle_ctr_carry:
+ /* construct IVs */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RA1; /* +3 ; +2 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RA2; /* +5 ; +4 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RA3; /* +7 ; +6 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RB0; /* +9 ; +8 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RB1; /* +11 ; +10 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RB2; /* +13 ; +12 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vpshufb RTMP3, RTMP0, RB3; /* +15 ; +14 */
+ inc_le128(RTMP0, RNOT, RTMP1);
+ vextracti128 $1, RTMP0, RTMP0x;
+ vpshufb RTMP3x, RTMP0x, RTMP0x; /* +16 */
+
+.align 4
+.Lctr_carry_done:
+ /* store new IV */
+ vmovdqu RTMP0x, (%rcx);
+
+ call __sm4_crypt_blk16;
+
+ vpxor (0 * 32)(%rdx), RA0, RA0;
+ vpxor (1 * 32)(%rdx), RA1, RA1;
+ vpxor (2 * 32)(%rdx), RA2, RA2;
+ vpxor (3 * 32)(%rdx), RA3, RA3;
+ vpxor (4 * 32)(%rdx), RB0, RB0;
+ vpxor (5 * 32)(%rdx), RB1, RB1;
+ vpxor (6 * 32)(%rdx), RB2, RB2;
+ vpxor (7 * 32)(%rdx), RB3, RB3;
+
+ vmovdqu RA0, (0 * 32)(%rsi);
+ vmovdqu RA1, (1 * 32)(%rsi);
+ vmovdqu RA2, (2 * 32)(%rsi);
+ vmovdqu RA3, (3 * 32)(%rsi);
+ vmovdqu RB0, (4 * 32)(%rsi);
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vmovdqu RB2, (6 * 32)(%rsi);
+ vmovdqu RB3, (7 * 32)(%rsi);
+
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16)
+
+/*
+ * void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst,
+ * const u8 *src, u8 *iv)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: iv
+ */
+ FRAME_BEGIN
+
+ vzeroupper;
+
+ vmovdqu (0 * 32)(%rdx), RA0;
+ vmovdqu (1 * 32)(%rdx), RA1;
+ vmovdqu (2 * 32)(%rdx), RA2;
+ vmovdqu (3 * 32)(%rdx), RA3;
+ vmovdqu (4 * 32)(%rdx), RB0;
+ vmovdqu (5 * 32)(%rdx), RB1;
+ vmovdqu (6 * 32)(%rdx), RB2;
+ vmovdqu (7 * 32)(%rdx), RB3;
+
+ call __sm4_crypt_blk16;
+
+ vmovdqu (%rcx), RNOTx;
+ vinserti128 $1, (%rdx), RNOT, RNOT;
+ vpxor RNOT, RA0, RA0;
+ vpxor (0 * 32 + 16)(%rdx), RA1, RA1;
+ vpxor (1 * 32 + 16)(%rdx), RA2, RA2;
+ vpxor (2 * 32 + 16)(%rdx), RA3, RA3;
+ vpxor (3 * 32 + 16)(%rdx), RB0, RB0;
+ vpxor (4 * 32 + 16)(%rdx), RB1, RB1;
+ vpxor (5 * 32 + 16)(%rdx), RB2, RB2;
+ vpxor (6 * 32 + 16)(%rdx), RB3, RB3;
+ vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
+ vmovdqu RNOTx, (%rcx); /* store new IV */
+
+ vmovdqu RA0, (0 * 32)(%rsi);
+ vmovdqu RA1, (1 * 32)(%rsi);
+ vmovdqu RA2, (2 * 32)(%rsi);
+ vmovdqu RA3, (3 * 32)(%rsi);
+ vmovdqu RB0, (4 * 32)(%rsi);
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vmovdqu RB2, (6 * 32)(%rsi);
+ vmovdqu RB3, (7 * 32)(%rsi);
+
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16)
+
+/*
+ * void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst,
+ * const u8 *src, u8 *iv)
+ */
+.align 8
+SYM_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16)
+ /* input:
+ * %rdi: round key array, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: iv
+ */
+ FRAME_BEGIN
+
+ vzeroupper;
+
+ /* Load input */
+ vmovdqu (%rcx), RNOTx;
+ vinserti128 $1, (%rdx), RNOT, RA0;
+ vmovdqu (0 * 32 + 16)(%rdx), RA1;
+ vmovdqu (1 * 32 + 16)(%rdx), RA2;
+ vmovdqu (2 * 32 + 16)(%rdx), RA3;
+ vmovdqu (3 * 32 + 16)(%rdx), RB0;
+ vmovdqu (4 * 32 + 16)(%rdx), RB1;
+ vmovdqu (5 * 32 + 16)(%rdx), RB2;
+ vmovdqu (6 * 32 + 16)(%rdx), RB3;
+
+ /* Update IV */
+ vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
+ vmovdqu RNOTx, (%rcx);
+
+ call __sm4_crypt_blk16;
+
+ vpxor (0 * 32)(%rdx), RA0, RA0;
+ vpxor (1 * 32)(%rdx), RA1, RA1;
+ vpxor (2 * 32)(%rdx), RA2, RA2;
+ vpxor (3 * 32)(%rdx), RA3, RA3;
+ vpxor (4 * 32)(%rdx), RB0, RB0;
+ vpxor (5 * 32)(%rdx), RB1, RB1;
+ vpxor (6 * 32)(%rdx), RB2, RB2;
+ vpxor (7 * 32)(%rdx), RB3, RB3;
+
+ vmovdqu RA0, (0 * 32)(%rsi);
+ vmovdqu RA1, (1 * 32)(%rsi);
+ vmovdqu RA2, (2 * 32)(%rsi);
+ vmovdqu RA3, (3 * 32)(%rsi);
+ vmovdqu RB0, (4 * 32)(%rsi);
+ vmovdqu RB1, (5 * 32)(%rsi);
+ vmovdqu RB2, (6 * 32)(%rsi);
+ vmovdqu RB3, (7 * 32)(%rsi);
+
+ vzeroall;
+ FRAME_END
+ ret;
+SYM_FUNC_END(sm4_aesni_avx2_cfb_dec_blk16)
diff --git a/arch/x86/crypto/sm4-avx.h b/arch/x86/crypto/sm4-avx.h
new file mode 100644
index 000000000000..1bceab7516aa
--- /dev/null
+++ b/arch/x86/crypto/sm4-avx.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef ASM_X86_SM4_AVX_H
+#define ASM_X86_SM4_AVX_H
+
+#include <linux/types.h>
+#include <crypto/sm4.h>
+
+typedef void (*sm4_crypt_func)(const u32 *rk, u8 *dst, const u8 *src, u8 *iv);
+
+int sm4_avx_ecb_encrypt(struct skcipher_request *req);
+int sm4_avx_ecb_decrypt(struct skcipher_request *req);
+
+int sm4_cbc_encrypt(struct skcipher_request *req);
+int sm4_avx_cbc_decrypt(struct skcipher_request *req,
+ unsigned int bsize, sm4_crypt_func func);
+
+int sm4_cfb_encrypt(struct skcipher_request *req);
+int sm4_avx_cfb_decrypt(struct skcipher_request *req,
+ unsigned int bsize, sm4_crypt_func func);
+
+int sm4_avx_ctr_crypt(struct skcipher_request *req,
+ unsigned int bsize, sm4_crypt_func func);
+
+#endif
diff --git a/arch/x86/crypto/sm4_aesni_avx2_glue.c b/arch/x86/crypto/sm4_aesni_avx2_glue.c
new file mode 100644
index 000000000000..84bc718f49a3
--- /dev/null
+++ b/arch/x86/crypto/sm4_aesni_avx2_glue.c
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4 Cipher Algorithm, AES-NI/AVX2 optimized.
+ * as specified in
+ * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
+ *
+ * Copyright (c) 2021, Alibaba Group.
+ * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <asm/simd.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/sm4.h>
+#include "sm4-avx.h"
+
+#define SM4_CRYPT16_BLOCK_SIZE (SM4_BLOCK_SIZE * 16)
+
+asmlinkage void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst,
+ const u8 *src, u8 *iv);
+asmlinkage void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst,
+ const u8 *src, u8 *iv);
+asmlinkage void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst,
+ const u8 *src, u8 *iv);
+
+static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return sm4_expandkey(ctx, key, key_len);
+}
+
+static int cbc_decrypt(struct skcipher_request *req)
+{
+ return sm4_avx_cbc_decrypt(req, SM4_CRYPT16_BLOCK_SIZE,
+ sm4_aesni_avx2_cbc_dec_blk16);
+}
+
+
+static int cfb_decrypt(struct skcipher_request *req)
+{
+ return sm4_avx_cfb_decrypt(req, SM4_CRYPT16_BLOCK_SIZE,
+ sm4_aesni_avx2_cfb_dec_blk16);
+}
+
+static int ctr_crypt(struct skcipher_request *req)
+{
+ return sm4_avx_ctr_crypt(req, SM4_CRYPT16_BLOCK_SIZE,
+ sm4_aesni_avx2_ctr_enc_blk16);
+}
+
+static struct skcipher_alg sm4_aesni_avx2_skciphers[] = {
+ {
+ .base = {
+ .cra_name = "__ecb(sm4)",
+ .cra_driver_name = "__ecb-sm4-aesni-avx2",
+ .cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .walksize = 16 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = sm4_avx_ecb_encrypt,
+ .decrypt = sm4_avx_ecb_decrypt,
+ }, {
+ .base = {
+ .cra_name = "__cbc(sm4)",
+ .cra_driver_name = "__cbc-sm4-aesni-avx2",
+ .cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .walksize = 16 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = sm4_cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }, {
+ .base = {
+ .cra_name = "__cfb(sm4)",
+ .cra_driver_name = "__cfb-sm4-aesni-avx2",
+ .cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .chunksize = SM4_BLOCK_SIZE,
+ .walksize = 16 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = sm4_cfb_encrypt,
+ .decrypt = cfb_decrypt,
+ }, {
+ .base = {
+ .cra_name = "__ctr(sm4)",
+ .cra_driver_name = "__ctr-sm4-aesni-avx2",
+ .cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .chunksize = SM4_BLOCK_SIZE,
+ .walksize = 16 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ }
+};
+
+static struct simd_skcipher_alg *
+simd_sm4_aesni_avx2_skciphers[ARRAY_SIZE(sm4_aesni_avx2_skciphers)];
+
+static int __init sm4_init(void)
+{
+ const char *feature_name;
+
+ if (!boot_cpu_has(X86_FEATURE_AVX) ||
+ !boot_cpu_has(X86_FEATURE_AVX2) ||
+ !boot_cpu_has(X86_FEATURE_AES) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
+ pr_info("AVX2 or AES-NI instructions are not detected.\n");
+ return -ENODEV;
+ }
+
+ if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+ &feature_name)) {
+ pr_info("CPU feature '%s' is not supported.\n", feature_name);
+ return -ENODEV;
+ }
+
+ return simd_register_skciphers_compat(sm4_aesni_avx2_skciphers,
+ ARRAY_SIZE(sm4_aesni_avx2_skciphers),
+ simd_sm4_aesni_avx2_skciphers);
+}
+
+static void __exit sm4_exit(void)
+{
+ simd_unregister_skciphers(sm4_aesni_avx2_skciphers,
+ ARRAY_SIZE(sm4_aesni_avx2_skciphers),
+ simd_sm4_aesni_avx2_skciphers);
+}
+
+module_init(sm4_init);
+module_exit(sm4_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
+MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX2 optimized");
+MODULE_ALIAS_CRYPTO("sm4");
+MODULE_ALIAS_CRYPTO("sm4-aesni-avx2");
diff --git a/arch/x86/crypto/sm4_aesni_avx_glue.c b/arch/x86/crypto/sm4_aesni_avx_glue.c
new file mode 100644
index 000000000000..7800f77d68ad
--- /dev/null
+++ b/arch/x86/crypto/sm4_aesni_avx_glue.c
@@ -0,0 +1,487 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4 Cipher Algorithm, AES-NI/AVX optimized.
+ * as specified in
+ * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
+ *
+ * Copyright (c) 2021, Alibaba Group.
+ * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <asm/simd.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/sm4.h>
+#include "sm4-avx.h"
+
+#define SM4_CRYPT8_BLOCK_SIZE (SM4_BLOCK_SIZE * 8)
+
+asmlinkage void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst,
+ const u8 *src, int nblocks);
+asmlinkage void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst,
+ const u8 *src, int nblocks);
+asmlinkage void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst,
+ const u8 *src, u8 *iv);
+asmlinkage void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst,
+ const u8 *src, u8 *iv);
+asmlinkage void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst,
+ const u8 *src, u8 *iv);
+
+static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return sm4_expandkey(ctx, key, key_len);
+}
+
+static int ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
+{
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+
+ kernel_fpu_begin();
+ while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) {
+ sm4_aesni_avx_crypt8(rkey, dst, src, 8);
+ dst += SM4_CRYPT8_BLOCK_SIZE;
+ src += SM4_CRYPT8_BLOCK_SIZE;
+ nbytes -= SM4_CRYPT8_BLOCK_SIZE;
+ }
+ while (nbytes >= SM4_BLOCK_SIZE) {
+ unsigned int nblocks = min(nbytes >> 4, 4u);
+ sm4_aesni_avx_crypt4(rkey, dst, src, nblocks);
+ dst += nblocks * SM4_BLOCK_SIZE;
+ src += nblocks * SM4_BLOCK_SIZE;
+ nbytes -= nblocks * SM4_BLOCK_SIZE;
+ }
+ kernel_fpu_end();
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+
+int sm4_avx_ecb_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return ecb_do_crypt(req, ctx->rkey_enc);
+}
+EXPORT_SYMBOL_GPL(sm4_avx_ecb_encrypt);
+
+int sm4_avx_ecb_decrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return ecb_do_crypt(req, ctx->rkey_dec);
+}
+EXPORT_SYMBOL_GPL(sm4_avx_ecb_decrypt);
+
+int sm4_cbc_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *iv = walk.iv;
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+
+ while (nbytes >= SM4_BLOCK_SIZE) {
+ crypto_xor_cpy(dst, src, iv, SM4_BLOCK_SIZE);
+ sm4_crypt_block(ctx->rkey_enc, dst, dst);
+ iv = dst;
+ src += SM4_BLOCK_SIZE;
+ dst += SM4_BLOCK_SIZE;
+ nbytes -= SM4_BLOCK_SIZE;
+ }
+ if (iv != walk.iv)
+ memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(sm4_cbc_encrypt);
+
+int sm4_avx_cbc_decrypt(struct skcipher_request *req,
+ unsigned int bsize, sm4_crypt_func func)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+
+ kernel_fpu_begin();
+
+ while (nbytes >= bsize) {
+ func(ctx->rkey_dec, dst, src, walk.iv);
+ dst += bsize;
+ src += bsize;
+ nbytes -= bsize;
+ }
+
+ while (nbytes >= SM4_BLOCK_SIZE) {
+ u8 keystream[SM4_BLOCK_SIZE * 8];
+ u8 iv[SM4_BLOCK_SIZE];
+ unsigned int nblocks = min(nbytes >> 4, 8u);
+ int i;
+
+ sm4_aesni_avx_crypt8(ctx->rkey_dec, keystream,
+ src, nblocks);
+
+ src += ((int)nblocks - 2) * SM4_BLOCK_SIZE;
+ dst += (nblocks - 1) * SM4_BLOCK_SIZE;
+ memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE);
+
+ for (i = nblocks - 1; i > 0; i--) {
+ crypto_xor_cpy(dst, src,
+ &keystream[i * SM4_BLOCK_SIZE],
+ SM4_BLOCK_SIZE);
+ src -= SM4_BLOCK_SIZE;
+ dst -= SM4_BLOCK_SIZE;
+ }
+ crypto_xor_cpy(dst, walk.iv, keystream, SM4_BLOCK_SIZE);
+ memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
+ dst += nblocks * SM4_BLOCK_SIZE;
+ src += (nblocks + 1) * SM4_BLOCK_SIZE;
+ nbytes -= nblocks * SM4_BLOCK_SIZE;
+ }
+
+ kernel_fpu_end();
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(sm4_avx_cbc_decrypt);
+
+static int cbc_decrypt(struct skcipher_request *req)
+{
+ return sm4_avx_cbc_decrypt(req, SM4_CRYPT8_BLOCK_SIZE,
+ sm4_aesni_avx_cbc_dec_blk8);
+}
+
+int sm4_cfb_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ u8 keystream[SM4_BLOCK_SIZE];
+ const u8 *iv = walk.iv;
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+
+ while (nbytes >= SM4_BLOCK_SIZE) {
+ sm4_crypt_block(ctx->rkey_enc, keystream, iv);
+ crypto_xor_cpy(dst, src, keystream, SM4_BLOCK_SIZE);
+ iv = dst;
+ src += SM4_BLOCK_SIZE;
+ dst += SM4_BLOCK_SIZE;
+ nbytes -= SM4_BLOCK_SIZE;
+ }
+ if (iv != walk.iv)
+ memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
+
+ /* tail */
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
+ crypto_xor_cpy(dst, src, keystream, nbytes);
+ nbytes = 0;
+ }
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(sm4_cfb_encrypt);
+
+int sm4_avx_cfb_decrypt(struct skcipher_request *req,
+ unsigned int bsize, sm4_crypt_func func)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+
+ kernel_fpu_begin();
+
+ while (nbytes >= bsize) {
+ func(ctx->rkey_enc, dst, src, walk.iv);
+ dst += bsize;
+ src += bsize;
+ nbytes -= bsize;
+ }
+
+ while (nbytes >= SM4_BLOCK_SIZE) {
+ u8 keystream[SM4_BLOCK_SIZE * 8];
+ unsigned int nblocks = min(nbytes >> 4, 8u);
+
+ memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
+ if (nblocks > 1)
+ memcpy(&keystream[SM4_BLOCK_SIZE], src,
+ (nblocks - 1) * SM4_BLOCK_SIZE);
+ memcpy(walk.iv, src + (nblocks - 1) * SM4_BLOCK_SIZE,
+ SM4_BLOCK_SIZE);
+
+ sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream,
+ keystream, nblocks);
+
+ crypto_xor_cpy(dst, src, keystream,
+ nblocks * SM4_BLOCK_SIZE);
+ dst += nblocks * SM4_BLOCK_SIZE;
+ src += nblocks * SM4_BLOCK_SIZE;
+ nbytes -= nblocks * SM4_BLOCK_SIZE;
+ }
+
+ kernel_fpu_end();
+
+ /* tail */
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ u8 keystream[SM4_BLOCK_SIZE];
+
+ sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
+ crypto_xor_cpy(dst, src, keystream, nbytes);
+ nbytes = 0;
+ }
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(sm4_avx_cfb_decrypt);
+
+static int cfb_decrypt(struct skcipher_request *req)
+{
+ return sm4_avx_cfb_decrypt(req, SM4_CRYPT8_BLOCK_SIZE,
+ sm4_aesni_avx_cfb_dec_blk8);
+}
+
+int sm4_avx_ctr_crypt(struct skcipher_request *req,
+ unsigned int bsize, sm4_crypt_func func)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) > 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+
+ kernel_fpu_begin();
+
+ while (nbytes >= bsize) {
+ func(ctx->rkey_enc, dst, src, walk.iv);
+ dst += bsize;
+ src += bsize;
+ nbytes -= bsize;
+ }
+
+ while (nbytes >= SM4_BLOCK_SIZE) {
+ u8 keystream[SM4_BLOCK_SIZE * 8];
+ unsigned int nblocks = min(nbytes >> 4, 8u);
+ int i;
+
+ for (i = 0; i < nblocks; i++) {
+ memcpy(&keystream[i * SM4_BLOCK_SIZE],
+ walk.iv, SM4_BLOCK_SIZE);
+ crypto_inc(walk.iv, SM4_BLOCK_SIZE);
+ }
+ sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream,
+ keystream, nblocks);
+
+ crypto_xor_cpy(dst, src, keystream,
+ nblocks * SM4_BLOCK_SIZE);
+ dst += nblocks * SM4_BLOCK_SIZE;
+ src += nblocks * SM4_BLOCK_SIZE;
+ nbytes -= nblocks * SM4_BLOCK_SIZE;
+ }
+
+ kernel_fpu_end();
+
+ /* tail */
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ u8 keystream[SM4_BLOCK_SIZE];
+
+ memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
+ crypto_inc(walk.iv, SM4_BLOCK_SIZE);
+
+ sm4_crypt_block(ctx->rkey_enc, keystream, keystream);
+
+ crypto_xor_cpy(dst, src, keystream, nbytes);
+ dst += nbytes;
+ src += nbytes;
+ nbytes = 0;
+ }
+
+ err = skcipher_walk_done(&walk, nbytes);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(sm4_avx_ctr_crypt);
+
+static int ctr_crypt(struct skcipher_request *req)
+{
+ return sm4_avx_ctr_crypt(req, SM4_CRYPT8_BLOCK_SIZE,
+ sm4_aesni_avx_ctr_enc_blk8);
+}
+
+static struct skcipher_alg sm4_aesni_avx_skciphers[] = {
+ {
+ .base = {
+ .cra_name = "__ecb(sm4)",
+ .cra_driver_name = "__ecb-sm4-aesni-avx",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .walksize = 8 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = sm4_avx_ecb_encrypt,
+ .decrypt = sm4_avx_ecb_decrypt,
+ }, {
+ .base = {
+ .cra_name = "__cbc(sm4)",
+ .cra_driver_name = "__cbc-sm4-aesni-avx",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .walksize = 8 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = sm4_cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }, {
+ .base = {
+ .cra_name = "__cfb(sm4)",
+ .cra_driver_name = "__cfb-sm4-aesni-avx",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .chunksize = SM4_BLOCK_SIZE,
+ .walksize = 8 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = sm4_cfb_encrypt,
+ .decrypt = cfb_decrypt,
+ }, {
+ .base = {
+ .cra_name = "__ctr(sm4)",
+ .cra_driver_name = "__ctr-sm4-aesni-avx",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .chunksize = SM4_BLOCK_SIZE,
+ .walksize = 8 * SM4_BLOCK_SIZE,
+ .setkey = sm4_skcipher_setkey,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ }
+};
+
+static struct simd_skcipher_alg *
+simd_sm4_aesni_avx_skciphers[ARRAY_SIZE(sm4_aesni_avx_skciphers)];
+
+static int __init sm4_init(void)
+{
+ const char *feature_name;
+
+ if (!boot_cpu_has(X86_FEATURE_AVX) ||
+ !boot_cpu_has(X86_FEATURE_AES) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
+ pr_info("AVX or AES-NI instructions are not detected.\n");
+ return -ENODEV;
+ }
+
+ if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+ &feature_name)) {
+ pr_info("CPU feature '%s' is not supported.\n", feature_name);
+ return -ENODEV;
+ }
+
+ return simd_register_skciphers_compat(sm4_aesni_avx_skciphers,
+ ARRAY_SIZE(sm4_aesni_avx_skciphers),
+ simd_sm4_aesni_avx_skciphers);
+}
+
+static void __exit sm4_exit(void)
+{
+ simd_unregister_skciphers(sm4_aesni_avx_skciphers,
+ ARRAY_SIZE(sm4_aesni_avx_skciphers),
+ simd_sm4_aesni_avx_skciphers);
+}
+
+module_init(sm4_init);
+module_exit(sm4_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
+MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX optimized");
+MODULE_ALIAS_CRYPTO("sm4");
+MODULE_ALIAS_CRYPTO("sm4-aesni-avx");
diff --git a/arch/x86/events/Kconfig b/arch/x86/events/Kconfig
index 39d9ded9e25a..d6cdfe631674 100644
--- a/arch/x86/events/Kconfig
+++ b/arch/x86/events/Kconfig
@@ -34,4 +34,14 @@ config PERF_EVENTS_AMD_POWER
(CPUID Fn8000_0007_EDX[12]) interface to calculate the
average power consumption on Family 15h processors.
+config PERF_EVENTS_AMD_UNCORE
+ tristate "AMD Uncore performance events"
+ depends on PERF_EVENTS && CPU_SUP_AMD
+ default y
+ help
+ Include support for AMD uncore performance events for use with
+ e.g., perf stat -e amd_l3/.../,amd_df/.../.
+
+ To compile this driver as a module, choose M here: the
+ module will be called 'amd-uncore'.
endmenu
diff --git a/arch/x86/events/amd/Makefile b/arch/x86/events/amd/Makefile
index fe8795a67385..6cbe38d5fd9d 100644
--- a/arch/x86/events/amd/Makefile
+++ b/arch/x86/events/amd/Makefile
@@ -1,8 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_CPU_SUP_AMD) += core.o uncore.o
+obj-$(CONFIG_CPU_SUP_AMD) += core.o
obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += power.o
obj-$(CONFIG_X86_LOCAL_APIC) += ibs.o
+obj-$(CONFIG_PERF_EVENTS_AMD_UNCORE) += amd-uncore.o
+amd-uncore-objs := uncore.o
ifdef CONFIG_AMD_IOMMU
obj-$(CONFIG_CPU_SUP_AMD) += iommu.o
endif
-
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 40669eac9d6d..9739019d4b67 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -26,6 +26,7 @@ static u32 ibs_caps;
#include <linux/hardirq.h>
#include <asm/nmi.h>
+#include <asm/amd-ibs.h>
#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
@@ -90,6 +91,7 @@ struct perf_ibs {
unsigned long offset_mask[1];
int offset_max;
unsigned int fetch_count_reset_broken : 1;
+ unsigned int fetch_ignore_if_zero_rip : 1;
struct cpu_perf_ibs __percpu *pcpu;
struct attribute **format_attrs;
@@ -99,15 +101,6 @@ struct perf_ibs {
u64 (*get_count)(u64 config);
};
-struct perf_ibs_data {
- u32 size;
- union {
- u32 data[0]; /* data buffer starts here */
- u32 caps;
- };
- u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
-};
-
static int
perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
{
@@ -328,11 +321,14 @@ static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
static u64 get_ibs_fetch_count(u64 config)
{
- return (config & IBS_FETCH_CNT) >> 12;
+ union ibs_fetch_ctl fetch_ctl = (union ibs_fetch_ctl)config;
+
+ return fetch_ctl.fetch_cnt << 4;
}
static u64 get_ibs_op_count(u64 config)
{
+ union ibs_op_ctl op_ctl = (union ibs_op_ctl)config;
u64 count = 0;
/*
@@ -340,12 +336,12 @@ static u64 get_ibs_op_count(u64 config)
* and the lower 7 bits of CurCnt are randomized.
* Otherwise CurCnt has the full 27-bit current counter value.
*/
- if (config & IBS_OP_VAL) {
- count = (config & IBS_OP_MAX_CNT) << 4;
+ if (op_ctl.op_val) {
+ count = op_ctl.opmaxcnt << 4;
if (ibs_caps & IBS_CAPS_OPCNTEXT)
- count += config & IBS_OP_MAX_CNT_EXT_MASK;
+ count += op_ctl.opmaxcnt_ext << 20;
} else if (ibs_caps & IBS_CAPS_RDWROPCNT) {
- count = (config & IBS_OP_CUR_CNT) >> 32;
+ count = op_ctl.opcurcnt;
}
return count;
@@ -570,6 +566,7 @@ static struct perf_ibs perf_ibs_op = {
.start = perf_ibs_start,
.stop = perf_ibs_stop,
.read = perf_ibs_read,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
},
.msr = MSR_AMD64_IBSOPCTL,
.config_mask = IBS_OP_CONFIG_MASK,
@@ -672,6 +669,10 @@ fail:
if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
regs.flags &= ~PERF_EFLAGS_EXACT;
} else {
+ /* Workaround for erratum #1197 */
+ if (perf_ibs->fetch_ignore_if_zero_rip && !(ibs_data.regs[1]))
+ goto out;
+
set_linear_ip(&regs, ibs_data.regs[1]);
regs.flags |= PERF_EFLAGS_EXACT;
}
@@ -769,6 +770,9 @@ static __init void perf_event_ibs_init(void)
if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18)
perf_ibs_fetch.fetch_count_reset_broken = 1;
+ if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model < 0x10)
+ perf_ibs_fetch.fetch_ignore_if_zero_rip = 1;
+
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
if (ibs_caps & IBS_CAPS_OPCNT) {
diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c
index 16a2369c586e..37d5b380516e 100644
--- a/arch/x86/events/amd/power.c
+++ b/arch/x86/events/amd/power.c
@@ -213,6 +213,7 @@ static struct pmu pmu_class = {
.stop = pmu_event_stop,
.read = pmu_event_read,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .module = THIS_MODULE,
};
static int power_cpu_exit(unsigned int cpu)
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 582c0ffb5e98..0d04414b97d2 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -12,11 +12,11 @@
#include <linux/init.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
+#include <linux/cpufeature.h>
+#include <linux/smp.h>
-#include <asm/cpufeature.h>
#include <asm/perf_event.h>
#include <asm/msr.h>
-#include <asm/smp.h>
#define NUM_COUNTERS_NB 4
#define NUM_COUNTERS_L2 4
@@ -347,6 +347,7 @@ static struct pmu amd_nb_pmu = {
.stop = amd_uncore_stop,
.read = amd_uncore_read,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
+ .module = THIS_MODULE,
};
static struct pmu amd_llc_pmu = {
@@ -360,6 +361,7 @@ static struct pmu amd_llc_pmu = {
.stop = amd_uncore_stop,
.read = amd_uncore_read,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
+ .module = THIS_MODULE,
};
static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
@@ -452,7 +454,7 @@ static int amd_uncore_cpu_starting(unsigned int cpu)
if (amd_uncore_llc) {
uncore = *per_cpu_ptr(amd_uncore_llc, cpu);
- uncore->id = per_cpu(cpu_llc_id, cpu);
+ uncore->id = get_llc_id(cpu);
uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc);
*per_cpu_ptr(amd_uncore_llc, cpu) = uncore;
@@ -659,12 +661,34 @@ fail_prep:
fail_llc:
if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
perf_pmu_unregister(&amd_nb_pmu);
- if (amd_uncore_llc)
- free_percpu(amd_uncore_llc);
+ free_percpu(amd_uncore_llc);
fail_nb:
- if (amd_uncore_nb)
- free_percpu(amd_uncore_nb);
+ free_percpu(amd_uncore_nb);
return ret;
}
-device_initcall(amd_uncore_init);
+
+static void __exit amd_uncore_exit(void)
+{
+ cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE);
+ cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
+ cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
+
+ if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
+ perf_pmu_unregister(&amd_llc_pmu);
+ free_percpu(amd_uncore_llc);
+ amd_uncore_llc = NULL;
+ }
+
+ if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
+ perf_pmu_unregister(&amd_nb_pmu);
+ free_percpu(amd_uncore_nb);
+ amd_uncore_nb = NULL;
+ }
+}
+
+module_init(amd_uncore_init);
+module_exit(amd_uncore_exit);
+
+MODULE_DESCRIPTION("AMD Uncore Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 1eb45139fcc6..2a57dbed4894 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1087,10 +1087,8 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
* validate an event group (assign == NULL)
*/
if (!unsched && assign) {
- for (i = 0; i < n; i++) {
- e = cpuc->event_list[i];
+ for (i = 0; i < n; i++)
static_call_cond(x86_pmu_commit_scheduling)(cpuc, i, assign[i]);
- }
} else {
for (i = n0; i < n; i++) {
e = cpuc->event_list[i];
@@ -2489,13 +2487,15 @@ void perf_clear_dirty_counters(void)
return;
for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) {
- /* Metrics and fake events don't have corresponding HW counters. */
- if (is_metric_idx(i) || (i == INTEL_PMC_IDX_FIXED_VLBR))
- continue;
- else if (i >= INTEL_PMC_IDX_FIXED)
+ if (i >= INTEL_PMC_IDX_FIXED) {
+ /* Metrics and fake events don't have corresponding HW counters. */
+ if ((i - INTEL_PMC_IDX_FIXED) >= hybrid(cpuc->pmu, num_counters_fixed))
+ continue;
+
wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
- else
+ } else {
wrmsrl(x86_pmu_event_addr(i), 0);
+ }
}
bitmap_zero(cpuc->dirty, X86_PMC_IDX_MAX);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index fca7a6e2242f..7011e87be6d0 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2904,24 +2904,28 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
*/
static int intel_pmu_handle_irq(struct pt_regs *regs)
{
- struct cpu_hw_events *cpuc;
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ bool late_ack = hybrid_bit(cpuc->pmu, late_ack);
+ bool mid_ack = hybrid_bit(cpuc->pmu, mid_ack);
int loops;
u64 status;
int handled;
int pmu_enabled;
- cpuc = this_cpu_ptr(&cpu_hw_events);
-
/*
* Save the PMU state.
* It needs to be restored when leaving the handler.
*/
pmu_enabled = cpuc->enabled;
/*
- * No known reason to not always do late ACK,
- * but just in case do it opt-in.
+ * In general, the early ACK is only applied for old platforms.
+ * For the big core starts from Haswell, the late ACK should be
+ * applied.
+ * For the small core after Tremont, we have to do the ACK right
+ * before re-enabling counters, which is in the middle of the
+ * NMI handler.
*/
- if (!x86_pmu.late_ack)
+ if (!late_ack && !mid_ack)
apic_write(APIC_LVTPC, APIC_DM_NMI);
intel_bts_disable_local();
cpuc->enabled = 0;
@@ -2958,6 +2962,8 @@ again:
goto again;
done:
+ if (mid_ack)
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
/* Only restore PMU state when it's active. See x86_pmu_disable(). */
cpuc->enabled = pmu_enabled;
if (pmu_enabled)
@@ -2969,7 +2975,7 @@ done:
* have been reset. This avoids spurious NMIs on
* Haswell CPUs.
*/
- if (x86_pmu.late_ack)
+ if (late_ack)
apic_write(APIC_LVTPC, APIC_DM_NMI);
return handled;
}
@@ -5026,9 +5032,9 @@ static ssize_t freeze_on_smi_store(struct device *cdev,
x86_pmu.attr_freeze_on_smi = val;
- get_online_cpus();
+ cpus_read_lock();
on_each_cpu(flip_smm_bit, &val, 1);
- put_online_cpus();
+ cpus_read_unlock();
done:
mutex_unlock(&freeze_on_smi_mutex);
@@ -5071,9 +5077,9 @@ static ssize_t set_sysctl_tfa(struct device *cdev,
allow_tsx_force_abort = val;
- get_online_cpus();
+ cpus_read_lock();
on_each_cpu(update_tfa_sched, NULL, 1);
- put_online_cpus();
+ cpus_read_unlock();
return count;
}
@@ -6129,7 +6135,6 @@ __init int intel_pmu_init(void)
static_branch_enable(&perf_is_hybrid);
x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS;
- x86_pmu.late_ack = true;
x86_pmu.pebs_aliases = NULL;
x86_pmu.pebs_prec_dist = true;
x86_pmu.pebs_block = true;
@@ -6167,6 +6172,7 @@ __init int intel_pmu_init(void)
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
pmu->name = "cpu_core";
pmu->cpu_type = hybrid_big;
+ pmu->late_ack = true;
if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
pmu->num_counters = x86_pmu.num_counters + 2;
pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1;
@@ -6192,6 +6198,7 @@ __init int intel_pmu_init(void)
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
pmu->name = "cpu_atom";
pmu->cpu_type = hybrid_small;
+ pmu->mid_ack = true;
pmu->num_counters = x86_pmu.num_counters;
pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
pmu->max_pebs_events = x86_pmu.max_pebs_events;
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 915847655c06..7f406c14715f 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -62,7 +62,7 @@ static struct pt_cap_desc {
PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)),
PT_CAP(output_subsys, 0, CPUID_ECX, BIT(3)),
PT_CAP(payloads_lip, 0, CPUID_ECX, BIT(31)),
- PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x3),
+ PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x7),
PT_CAP(mtc_periods, 1, CPUID_EAX, 0xffff0000),
PT_CAP(cycle_thresholds, 1, CPUID_EBX, 0xffff),
PT_CAP(psb_periods, 1, CPUID_EBX, 0xffff0000),
@@ -1708,7 +1708,7 @@ static __init int pt_init(void)
if (!boot_cpu_has(X86_FEATURE_INTEL_PT))
return -ENODEV;
- get_online_cpus();
+ cpus_read_lock();
for_each_online_cpu(cpu) {
u64 ctl;
@@ -1716,7 +1716,7 @@ static __init int pt_init(void)
if (!ret && (ctl & RTIT_CTL_TRACEEN))
prior_warn++;
}
- put_online_cpus();
+ cpus_read_unlock();
if (prior_warn) {
x86_add_exclusive(x86_lbr_exclusive_pt);
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 9bf4dbbc26e2..c72e368dd164 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -842,6 +842,18 @@ static const struct attribute_group uncore_pmu_attr_group = {
.attrs = uncore_pmu_attrs,
};
+void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
+{
+ struct intel_uncore_type *type = pmu->type;
+
+ if (type->num_boxes == 1)
+ sprintf(pmu_name, "uncore_type_%u", type->type_id);
+ else {
+ sprintf(pmu_name, "uncore_type_%u_%d",
+ type->type_id, type->box_ids[pmu->pmu_idx]);
+ }
+}
+
static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
{
struct intel_uncore_type *type = pmu->type;
@@ -851,12 +863,7 @@ static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
* Use uncore_type_&typeid_&boxid as name.
*/
if (!type->name) {
- if (type->num_boxes == 1)
- sprintf(pmu->name, "uncore_type_%u", type->type_id);
- else {
- sprintf(pmu->name, "uncore_type_%u_%d",
- type->type_id, type->box_ids[pmu->pmu_idx]);
- }
+ uncore_get_alias_name(pmu->name, pmu);
return;
}
@@ -865,9 +872,13 @@ static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
sprintf(pmu->name, "uncore_%s", type->name);
else
sprintf(pmu->name, "uncore");
- } else
- sprintf(pmu->name, "uncore_%s_%d", type->name, pmu->pmu_idx);
-
+ } else {
+ /*
+ * Use the box ID from the discovery table if applicable.
+ */
+ sprintf(pmu->name, "uncore_%s_%d", type->name,
+ type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx);
+ }
}
static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
@@ -1663,6 +1674,7 @@ struct intel_uncore_init_fun {
void (*cpu_init)(void);
int (*pci_init)(void);
void (*mmio_init)(void);
+ bool use_discovery;
};
static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
@@ -1765,6 +1777,13 @@ static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
.mmio_init = snr_uncore_mmio_init,
};
+static const struct intel_uncore_init_fun spr_uncore_init __initconst = {
+ .cpu_init = spr_uncore_cpu_init,
+ .pci_init = spr_uncore_pci_init,
+ .mmio_init = spr_uncore_mmio_init,
+ .use_discovery = true,
+};
+
static const struct intel_uncore_init_fun generic_uncore_init __initconst = {
.cpu_init = intel_uncore_generic_uncore_cpu_init,
.pci_init = intel_uncore_generic_uncore_pci_init,
@@ -1809,6 +1828,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rkl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init),
{},
};
@@ -1832,8 +1852,13 @@ static int __init intel_uncore_init(void)
uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init;
else
return -ENODEV;
- } else
+ } else {
uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
+ if (uncore_no_discover && uncore_init->use_discovery)
+ return -ENODEV;
+ if (uncore_init->use_discovery && !intel_uncore_has_discovery_tables())
+ return -ENODEV;
+ }
if (uncore_init->pci_init) {
pret = uncore_init->pci_init();
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 187d7287039c..b9687980aab6 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -561,6 +561,7 @@ struct event_constraint *
uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event);
void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event);
u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx);
+void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu);
extern struct intel_uncore_type *empty_uncore[];
extern struct intel_uncore_type **uncore_msr_uncores;
@@ -608,6 +609,9 @@ void snr_uncore_mmio_init(void);
int icx_uncore_pci_init(void);
void icx_uncore_cpu_init(void);
void icx_uncore_mmio_init(void);
+int spr_uncore_pci_init(void);
+void spr_uncore_cpu_init(void);
+void spr_uncore_mmio_init(void);
/* uncore_nhmex.c */
void nhmex_uncore_cpu_init(void);
diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c
index aba9bff95413..3049c646fa20 100644
--- a/arch/x86/events/intel/uncore_discovery.c
+++ b/arch/x86/events/intel/uncore_discovery.c
@@ -337,17 +337,17 @@ static const struct attribute_group generic_uncore_format_group = {
.attrs = generic_uncore_formats_attr,
};
-static void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box)
+void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box)
{
wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_INT);
}
-static void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box)
+void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box)
{
wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_FRZ);
}
-static void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box)
+void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box)
{
wrmsrl(uncore_msr_box_ctl(box), 0);
}
@@ -377,7 +377,7 @@ static struct intel_uncore_ops generic_uncore_msr_ops = {
.read_counter = uncore_msr_read_counter,
};
-static void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box)
+void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box)
{
struct pci_dev *pdev = box->pci_dev;
int box_ctl = uncore_pci_box_ctl(box);
@@ -386,7 +386,7 @@ static void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box)
pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_INT);
}
-static void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box)
+void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box)
{
struct pci_dev *pdev = box->pci_dev;
int box_ctl = uncore_pci_box_ctl(box);
@@ -394,7 +394,7 @@ static void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box)
pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_FRZ);
}
-static void intel_generic_uncore_pci_enable_box(struct intel_uncore_box *box)
+void intel_generic_uncore_pci_enable_box(struct intel_uncore_box *box)
{
struct pci_dev *pdev = box->pci_dev;
int box_ctl = uncore_pci_box_ctl(box);
@@ -411,8 +411,8 @@ static void intel_generic_uncore_pci_enable_event(struct intel_uncore_box *box,
pci_write_config_dword(pdev, hwc->config_base, hwc->config);
}
-static void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box,
- struct perf_event *event)
+void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
{
struct pci_dev *pdev = box->pci_dev;
struct hw_perf_event *hwc = &event->hw;
@@ -420,8 +420,8 @@ static void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box,
pci_write_config_dword(pdev, hwc->config_base, 0);
}
-static u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box,
- struct perf_event *event)
+u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box,
+ struct perf_event *event)
{
struct pci_dev *pdev = box->pci_dev;
struct hw_perf_event *hwc = &event->hw;
@@ -454,7 +454,7 @@ static unsigned int generic_uncore_mmio_box_ctl(struct intel_uncore_box *box)
return type->box_ctls[box->dieid] + type->mmio_offsets[box->pmu->pmu_idx];
}
-static void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box)
+void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box)
{
unsigned int box_ctl = generic_uncore_mmio_box_ctl(box);
struct intel_uncore_type *type = box->pmu->type;
@@ -478,7 +478,7 @@ static void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box)
writel(GENERIC_PMON_BOX_CTL_INT, box->io_addr);
}
-static void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box)
+void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box)
{
if (!box->io_addr)
return;
@@ -486,7 +486,7 @@ static void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box)
writel(GENERIC_PMON_BOX_CTL_FRZ, box->io_addr);
}
-static void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box)
+void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box)
{
if (!box->io_addr)
return;
@@ -505,8 +505,8 @@ static void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
writel(hwc->config, box->io_addr + hwc->config_base);
}
-static void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box,
- struct perf_event *event)
+void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
@@ -568,8 +568,8 @@ static bool uncore_update_uncore_type(enum uncore_access_type type_id,
return true;
}
-static struct intel_uncore_type **
-intel_uncore_generic_init_uncores(enum uncore_access_type type_id)
+struct intel_uncore_type **
+intel_uncore_generic_init_uncores(enum uncore_access_type type_id, int num_extra)
{
struct intel_uncore_discovery_type *type;
struct intel_uncore_type **uncores;
@@ -577,7 +577,7 @@ intel_uncore_generic_init_uncores(enum uncore_access_type type_id)
struct rb_node *node;
int i = 0;
- uncores = kcalloc(num_discovered_types[type_id] + 1,
+ uncores = kcalloc(num_discovered_types[type_id] + num_extra + 1,
sizeof(struct intel_uncore_type *), GFP_KERNEL);
if (!uncores)
return empty_uncore;
@@ -606,17 +606,17 @@ intel_uncore_generic_init_uncores(enum uncore_access_type type_id)
void intel_uncore_generic_uncore_cpu_init(void)
{
- uncore_msr_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_MSR);
+ uncore_msr_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_MSR, 0);
}
int intel_uncore_generic_uncore_pci_init(void)
{
- uncore_pci_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_PCI);
+ uncore_pci_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_PCI, 0);
return 0;
}
void intel_uncore_generic_uncore_mmio_init(void)
{
- uncore_mmio_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_MMIO);
+ uncore_mmio_uncores = intel_uncore_generic_init_uncores(UNCORE_ACCESS_MMIO, 0);
}
diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h
index 1d652939a01c..7280c8a3c831 100644
--- a/arch/x86/events/intel/uncore_discovery.h
+++ b/arch/x86/events/intel/uncore_discovery.h
@@ -129,3 +129,24 @@ void intel_uncore_clear_discovery_tables(void);
void intel_uncore_generic_uncore_cpu_init(void);
int intel_uncore_generic_uncore_pci_init(void);
void intel_uncore_generic_uncore_mmio_init(void);
+
+void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box);
+void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box);
+void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box);
+
+void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box);
+void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box);
+void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box);
+void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box,
+ struct perf_event *event);
+
+void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box);
+void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box);
+void intel_generic_uncore_pci_enable_box(struct intel_uncore_box *box);
+void intel_generic_uncore_pci_disable_event(struct intel_uncore_box *box,
+ struct perf_event *event);
+u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box,
+ struct perf_event *event);
+
+struct intel_uncore_type **
+intel_uncore_generic_init_uncores(enum uncore_access_type type_id, int num_extra);
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 609c24aec71a..5ddc0f30db6f 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* SandyBridge-EP/IvyTown uncore support */
#include "uncore.h"
+#include "uncore_discovery.h"
/* SNB-EP pci bus to socket mapping */
#define SNBEP_CPUNODEID 0x40
@@ -454,6 +455,17 @@
#define ICX_NUMBER_IMC_CHN 2
#define ICX_IMC_MEM_STRIDE 0x4
+/* SPR */
+#define SPR_RAW_EVENT_MASK_EXT 0xffffff
+
+/* SPR CHA */
+#define SPR_CHA_PMON_CTL_TID_EN (1 << 16)
+#define SPR_CHA_PMON_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \
+ SPR_CHA_PMON_CTL_TID_EN)
+#define SPR_CHA_PMON_BOX_FILTER_TID 0x3ff
+
+#define SPR_C0_MSR_PMON_BOX_FILTER0 0x200e
+
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6");
DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21");
@@ -466,6 +478,7 @@ DEFINE_UNCORE_FORMAT_ATTR(umask_ext4, umask, "config:8-15,32-55");
DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
+DEFINE_UNCORE_FORMAT_ATTR(tid_en2, tid_en, "config:16");
DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
DEFINE_UNCORE_FORMAT_ATTR(thresh9, thresh, "config:24-35");
DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
@@ -3838,26 +3851,32 @@ clear_attr_update:
return ret;
}
-static int skx_iio_set_mapping(struct intel_uncore_type *type)
-{
- return pmu_iio_set_mapping(type, &skx_iio_mapping_group);
-}
-
-static void skx_iio_cleanup_mapping(struct intel_uncore_type *type)
+static void
+pmu_iio_cleanup_mapping(struct intel_uncore_type *type, struct attribute_group *ag)
{
- struct attribute **attr = skx_iio_mapping_group.attrs;
+ struct attribute **attr = ag->attrs;
if (!attr)
return;
for (; *attr; attr++)
kfree((*attr)->name);
- kfree(attr_to_ext_attr(*skx_iio_mapping_group.attrs));
- kfree(skx_iio_mapping_group.attrs);
- skx_iio_mapping_group.attrs = NULL;
+ kfree(attr_to_ext_attr(*ag->attrs));
+ kfree(ag->attrs);
+ ag->attrs = NULL;
kfree(type->topology);
}
+static int skx_iio_set_mapping(struct intel_uncore_type *type)
+{
+ return pmu_iio_set_mapping(type, &skx_iio_mapping_group);
+}
+
+static void skx_iio_cleanup_mapping(struct intel_uncore_type *type)
+{
+ pmu_iio_cleanup_mapping(type, &skx_iio_mapping_group);
+}
+
static struct intel_uncore_type skx_uncore_iio = {
.name = "iio",
.num_counters = 4,
@@ -4501,6 +4520,11 @@ static int snr_iio_set_mapping(struct intel_uncore_type *type)
return pmu_iio_set_mapping(type, &snr_iio_mapping_group);
}
+static void snr_iio_cleanup_mapping(struct intel_uncore_type *type)
+{
+ pmu_iio_cleanup_mapping(type, &snr_iio_mapping_group);
+}
+
static struct intel_uncore_type snr_uncore_iio = {
.name = "iio",
.num_counters = 4,
@@ -4517,7 +4541,7 @@ static struct intel_uncore_type snr_uncore_iio = {
.attr_update = snr_iio_attr_update,
.get_topology = snr_iio_get_topology,
.set_mapping = snr_iio_set_mapping,
- .cleanup_mapping = skx_iio_cleanup_mapping,
+ .cleanup_mapping = snr_iio_cleanup_mapping,
};
static struct intel_uncore_type snr_uncore_irp = {
@@ -4783,13 +4807,15 @@ int snr_uncore_pci_init(void)
return 0;
}
-static struct pci_dev *snr_uncore_get_mc_dev(int id)
+#define SNR_MC_DEVICE_ID 0x3451
+
+static struct pci_dev *snr_uncore_get_mc_dev(unsigned int device, int id)
{
struct pci_dev *mc_dev = NULL;
int pkg;
while (1) {
- mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3451, mc_dev);
+ mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, mc_dev);
if (!mc_dev)
break;
pkg = uncore_pcibus_to_dieid(mc_dev->bus);
@@ -4799,19 +4825,20 @@ static struct pci_dev *snr_uncore_get_mc_dev(int id)
return mc_dev;
}
-static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box,
- unsigned int box_ctl, int mem_offset)
+static int snr_uncore_mmio_map(struct intel_uncore_box *box,
+ unsigned int box_ctl, int mem_offset,
+ unsigned int device)
{
- struct pci_dev *pdev = snr_uncore_get_mc_dev(box->dieid);
+ struct pci_dev *pdev = snr_uncore_get_mc_dev(device, box->dieid);
struct intel_uncore_type *type = box->pmu->type;
resource_size_t addr;
u32 pci_dword;
if (!pdev)
- return;
+ return -ENODEV;
pci_read_config_dword(pdev, SNR_IMC_MMIO_BASE_OFFSET, &pci_dword);
- addr = (pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23;
+ addr = ((resource_size_t)pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23;
pci_read_config_dword(pdev, mem_offset, &pci_dword);
addr |= (pci_dword & SNR_IMC_MMIO_MEM0_MASK) << 12;
@@ -4821,16 +4848,25 @@ static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box,
box->io_addr = ioremap(addr, type->mmio_map_size);
if (!box->io_addr) {
pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name);
- return;
+ return -EINVAL;
}
- writel(IVBEP_PMON_BOX_CTL_INT, box->io_addr);
+ return 0;
+}
+
+static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box,
+ unsigned int box_ctl, int mem_offset,
+ unsigned int device)
+{
+ if (!snr_uncore_mmio_map(box, box_ctl, mem_offset, device))
+ writel(IVBEP_PMON_BOX_CTL_INT, box->io_addr);
}
static void snr_uncore_mmio_init_box(struct intel_uncore_box *box)
{
__snr_uncore_mmio_init_box(box, uncore_mmio_box_ctl(box),
- SNR_IMC_MMIO_MEM0_OFFSET);
+ SNR_IMC_MMIO_MEM0_OFFSET,
+ SNR_MC_DEVICE_ID);
}
static void snr_uncore_mmio_disable_box(struct intel_uncore_box *box)
@@ -5092,6 +5128,11 @@ static int icx_iio_set_mapping(struct intel_uncore_type *type)
return pmu_iio_set_mapping(type, &icx_iio_mapping_group);
}
+static void icx_iio_cleanup_mapping(struct intel_uncore_type *type)
+{
+ pmu_iio_cleanup_mapping(type, &icx_iio_mapping_group);
+}
+
static struct intel_uncore_type icx_uncore_iio = {
.name = "iio",
.num_counters = 4,
@@ -5109,7 +5150,7 @@ static struct intel_uncore_type icx_uncore_iio = {
.attr_update = icx_iio_attr_update,
.get_topology = icx_iio_get_topology,
.set_mapping = icx_iio_set_mapping,
- .cleanup_mapping = skx_iio_cleanup_mapping,
+ .cleanup_mapping = icx_iio_cleanup_mapping,
};
static struct intel_uncore_type icx_uncore_irp = {
@@ -5405,7 +5446,8 @@ static void icx_uncore_imc_init_box(struct intel_uncore_box *box)
int mem_offset = (box->pmu->pmu_idx / ICX_NUMBER_IMC_CHN) * ICX_IMC_MEM_STRIDE +
SNR_IMC_MMIO_MEM0_OFFSET;
- __snr_uncore_mmio_init_box(box, box_ctl, mem_offset);
+ __snr_uncore_mmio_init_box(box, box_ctl, mem_offset,
+ SNR_MC_DEVICE_ID);
}
static struct intel_uncore_ops icx_uncore_mmio_ops = {
@@ -5475,7 +5517,8 @@ static void icx_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
int mem_offset = box->pmu->pmu_idx * ICX_IMC_MEM_STRIDE +
SNR_IMC_MMIO_MEM0_OFFSET;
- __snr_uncore_mmio_init_box(box, uncore_mmio_box_ctl(box), mem_offset);
+ snr_uncore_mmio_map(box, uncore_mmio_box_ctl(box),
+ mem_offset, SNR_MC_DEVICE_ID);
}
static struct intel_uncore_ops icx_uncore_imc_freerunning_ops = {
@@ -5509,3 +5552,497 @@ void icx_uncore_mmio_init(void)
}
/* end of ICX uncore support */
+
+/* SPR uncore support */
+
+static void spr_uncore_msr_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE)
+ wrmsrl(reg1->reg, reg1->config);
+
+ wrmsrl(hwc->config_base, hwc->config);
+}
+
+static void spr_uncore_msr_disable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE)
+ wrmsrl(reg1->reg, 0);
+
+ wrmsrl(hwc->config_base, 0);
+}
+
+static int spr_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ bool tie_en = !!(event->hw.config & SPR_CHA_PMON_CTL_TID_EN);
+ struct intel_uncore_type *type = box->pmu->type;
+
+ if (tie_en) {
+ reg1->reg = SPR_C0_MSR_PMON_BOX_FILTER0 +
+ HSWEP_CBO_MSR_OFFSET * type->box_ids[box->pmu->pmu_idx];
+ reg1->config = event->attr.config1 & SPR_CHA_PMON_BOX_FILTER_TID;
+ reg1->idx = 0;
+ }
+
+ return 0;
+}
+
+static struct intel_uncore_ops spr_uncore_chabox_ops = {
+ .init_box = intel_generic_uncore_msr_init_box,
+ .disable_box = intel_generic_uncore_msr_disable_box,
+ .enable_box = intel_generic_uncore_msr_enable_box,
+ .disable_event = spr_uncore_msr_disable_event,
+ .enable_event = spr_uncore_msr_enable_event,
+ .read_counter = uncore_msr_read_counter,
+ .hw_config = spr_cha_hw_config,
+ .get_constraint = uncore_get_constraint,
+ .put_constraint = uncore_put_constraint,
+};
+
+static struct attribute *spr_uncore_cha_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask_ext4.attr,
+ &format_attr_tid_en2.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_filter_tid5.attr,
+ NULL,
+};
+static const struct attribute_group spr_uncore_chabox_format_group = {
+ .name = "format",
+ .attrs = spr_uncore_cha_formats_attr,
+};
+
+static ssize_t alias_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(dev);
+ char pmu_name[UNCORE_PMU_NAME_LEN];
+
+ uncore_get_alias_name(pmu_name, pmu);
+ return sysfs_emit(buf, "%s\n", pmu_name);
+}
+
+static DEVICE_ATTR_RO(alias);
+
+static struct attribute *uncore_alias_attrs[] = {
+ &dev_attr_alias.attr,
+ NULL
+};
+
+ATTRIBUTE_GROUPS(uncore_alias);
+
+static struct intel_uncore_type spr_uncore_chabox = {
+ .name = "cha",
+ .event_mask = SPR_CHA_PMON_EVENT_MASK,
+ .event_mask_ext = SPR_RAW_EVENT_MASK_EXT,
+ .num_shared_regs = 1,
+ .ops = &spr_uncore_chabox_ops,
+ .format_group = &spr_uncore_chabox_format_group,
+ .attr_update = uncore_alias_groups,
+};
+
+static struct intel_uncore_type spr_uncore_iio = {
+ .name = "iio",
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT,
+ .format_group = &snr_uncore_iio_format_group,
+ .attr_update = uncore_alias_groups,
+};
+
+static struct attribute *spr_uncore_raw_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask_ext4.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ NULL,
+};
+
+static const struct attribute_group spr_uncore_raw_format_group = {
+ .name = "format",
+ .attrs = spr_uncore_raw_formats_attr,
+};
+
+#define SPR_UNCORE_COMMON_FORMAT() \
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK, \
+ .event_mask_ext = SPR_RAW_EVENT_MASK_EXT, \
+ .format_group = &spr_uncore_raw_format_group, \
+ .attr_update = uncore_alias_groups
+
+static struct intel_uncore_type spr_uncore_irp = {
+ SPR_UNCORE_COMMON_FORMAT(),
+ .name = "irp",
+
+};
+
+static struct intel_uncore_type spr_uncore_m2pcie = {
+ SPR_UNCORE_COMMON_FORMAT(),
+ .name = "m2pcie",
+};
+
+static struct intel_uncore_type spr_uncore_pcu = {
+ .name = "pcu",
+ .attr_update = uncore_alias_groups,
+};
+
+static void spr_uncore_mmio_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!box->io_addr)
+ return;
+
+ if (uncore_pmc_fixed(hwc->idx))
+ writel(SNBEP_PMON_CTL_EN, box->io_addr + hwc->config_base);
+ else
+ writel(hwc->config, box->io_addr + hwc->config_base);
+}
+
+static struct intel_uncore_ops spr_uncore_mmio_ops = {
+ .init_box = intel_generic_uncore_mmio_init_box,
+ .exit_box = uncore_mmio_exit_box,
+ .disable_box = intel_generic_uncore_mmio_disable_box,
+ .enable_box = intel_generic_uncore_mmio_enable_box,
+ .disable_event = intel_generic_uncore_mmio_disable_event,
+ .enable_event = spr_uncore_mmio_enable_event,
+ .read_counter = uncore_mmio_read_counter,
+};
+
+static struct intel_uncore_type spr_uncore_imc = {
+ SPR_UNCORE_COMMON_FORMAT(),
+ .name = "imc",
+ .fixed_ctr_bits = 48,
+ .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR,
+ .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL,
+ .ops = &spr_uncore_mmio_ops,
+};
+
+static void spr_uncore_pci_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+
+ pci_write_config_dword(pdev, hwc->config_base + 4, (u32)(hwc->config >> 32));
+ pci_write_config_dword(pdev, hwc->config_base, (u32)hwc->config);
+}
+
+static struct intel_uncore_ops spr_uncore_pci_ops = {
+ .init_box = intel_generic_uncore_pci_init_box,
+ .disable_box = intel_generic_uncore_pci_disable_box,
+ .enable_box = intel_generic_uncore_pci_enable_box,
+ .disable_event = intel_generic_uncore_pci_disable_event,
+ .enable_event = spr_uncore_pci_enable_event,
+ .read_counter = intel_generic_uncore_pci_read_counter,
+};
+
+#define SPR_UNCORE_PCI_COMMON_FORMAT() \
+ SPR_UNCORE_COMMON_FORMAT(), \
+ .ops = &spr_uncore_pci_ops
+
+static struct intel_uncore_type spr_uncore_m2m = {
+ SPR_UNCORE_PCI_COMMON_FORMAT(),
+ .name = "m2m",
+};
+
+static struct intel_uncore_type spr_uncore_upi = {
+ SPR_UNCORE_PCI_COMMON_FORMAT(),
+ .name = "upi",
+};
+
+static struct intel_uncore_type spr_uncore_m3upi = {
+ SPR_UNCORE_PCI_COMMON_FORMAT(),
+ .name = "m3upi",
+};
+
+static struct intel_uncore_type spr_uncore_mdf = {
+ SPR_UNCORE_COMMON_FORMAT(),
+ .name = "mdf",
+};
+
+#define UNCORE_SPR_NUM_UNCORE_TYPES 12
+#define UNCORE_SPR_IIO 1
+#define UNCORE_SPR_IMC 6
+
+static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = {
+ &spr_uncore_chabox,
+ &spr_uncore_iio,
+ &spr_uncore_irp,
+ &spr_uncore_m2pcie,
+ &spr_uncore_pcu,
+ NULL,
+ &spr_uncore_imc,
+ &spr_uncore_m2m,
+ &spr_uncore_upi,
+ &spr_uncore_m3upi,
+ NULL,
+ &spr_uncore_mdf,
+};
+
+enum perf_uncore_spr_iio_freerunning_type_id {
+ SPR_IIO_MSR_IOCLK,
+ SPR_IIO_MSR_BW_IN,
+ SPR_IIO_MSR_BW_OUT,
+
+ SPR_IIO_FREERUNNING_TYPE_MAX,
+};
+
+static struct freerunning_counters spr_iio_freerunning[] = {
+ [SPR_IIO_MSR_IOCLK] = { 0x340e, 0x1, 0x10, 1, 48 },
+ [SPR_IIO_MSR_BW_IN] = { 0x3800, 0x1, 0x10, 8, 48 },
+ [SPR_IIO_MSR_BW_OUT] = { 0x3808, 0x1, 0x10, 8, 48 },
+};
+
+static struct uncore_event_desc spr_uncore_iio_freerunning_events[] = {
+ /* Free-Running IIO CLOCKS Counter */
+ INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"),
+ /* Free-Running IIO BANDWIDTH IN Counters */
+ INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"),
+ /* Free-Running IIO BANDWIDTH OUT Counters */
+ INTEL_UNCORE_EVENT_DESC(bw_out_port0, "event=0xff,umask=0x30"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port0.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port0.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port1, "event=0xff,umask=0x31"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port1.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port1.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port2, "event=0xff,umask=0x32"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port2.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port2.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port3, "event=0xff,umask=0x33"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port3.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port3.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port4, "event=0xff,umask=0x34"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port4.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port4.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port5, "event=0xff,umask=0x35"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port5.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port5.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port6, "event=0xff,umask=0x36"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port6.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port6.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port7, "event=0xff,umask=0x37"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port7.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port7.unit, "MiB"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_type spr_uncore_iio_free_running = {
+ .name = "iio_free_running",
+ .num_counters = 17,
+ .num_freerunning_types = SPR_IIO_FREERUNNING_TYPE_MAX,
+ .freerunning = spr_iio_freerunning,
+ .ops = &skx_uncore_iio_freerunning_ops,
+ .event_descs = spr_uncore_iio_freerunning_events,
+ .format_group = &skx_uncore_iio_freerunning_format_group,
+};
+
+enum perf_uncore_spr_imc_freerunning_type_id {
+ SPR_IMC_DCLK,
+ SPR_IMC_PQ_CYCLES,
+
+ SPR_IMC_FREERUNNING_TYPE_MAX,
+};
+
+static struct freerunning_counters spr_imc_freerunning[] = {
+ [SPR_IMC_DCLK] = { 0x22b0, 0x0, 0, 1, 48 },
+ [SPR_IMC_PQ_CYCLES] = { 0x2318, 0x8, 0, 2, 48 },
+};
+
+static struct uncore_event_desc spr_uncore_imc_freerunning_events[] = {
+ INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"),
+
+ INTEL_UNCORE_EVENT_DESC(rpq_cycles, "event=0xff,umask=0x20"),
+ INTEL_UNCORE_EVENT_DESC(wpq_cycles, "event=0xff,umask=0x21"),
+ { /* end: all zeroes */ },
+};
+
+#define SPR_MC_DEVICE_ID 0x3251
+
+static void spr_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
+{
+ int mem_offset = box->pmu->pmu_idx * ICX_IMC_MEM_STRIDE + SNR_IMC_MMIO_MEM0_OFFSET;
+
+ snr_uncore_mmio_map(box, uncore_mmio_box_ctl(box),
+ mem_offset, SPR_MC_DEVICE_ID);
+}
+
+static struct intel_uncore_ops spr_uncore_imc_freerunning_ops = {
+ .init_box = spr_uncore_imc_freerunning_init_box,
+ .exit_box = uncore_mmio_exit_box,
+ .read_counter = uncore_mmio_read_counter,
+ .hw_config = uncore_freerunning_hw_config,
+};
+
+static struct intel_uncore_type spr_uncore_imc_free_running = {
+ .name = "imc_free_running",
+ .num_counters = 3,
+ .mmio_map_size = SNR_IMC_MMIO_SIZE,
+ .num_freerunning_types = SPR_IMC_FREERUNNING_TYPE_MAX,
+ .freerunning = spr_imc_freerunning,
+ .ops = &spr_uncore_imc_freerunning_ops,
+ .event_descs = spr_uncore_imc_freerunning_events,
+ .format_group = &skx_uncore_iio_freerunning_format_group,
+};
+
+#define UNCORE_SPR_MSR_EXTRA_UNCORES 1
+#define UNCORE_SPR_MMIO_EXTRA_UNCORES 1
+
+static struct intel_uncore_type *spr_msr_uncores[UNCORE_SPR_MSR_EXTRA_UNCORES] = {
+ &spr_uncore_iio_free_running,
+};
+
+static struct intel_uncore_type *spr_mmio_uncores[UNCORE_SPR_MMIO_EXTRA_UNCORES] = {
+ &spr_uncore_imc_free_running,
+};
+
+static void uncore_type_customized_copy(struct intel_uncore_type *to_type,
+ struct intel_uncore_type *from_type)
+{
+ if (!to_type || !from_type)
+ return;
+
+ if (from_type->name)
+ to_type->name = from_type->name;
+ if (from_type->fixed_ctr_bits)
+ to_type->fixed_ctr_bits = from_type->fixed_ctr_bits;
+ if (from_type->event_mask)
+ to_type->event_mask = from_type->event_mask;
+ if (from_type->event_mask_ext)
+ to_type->event_mask_ext = from_type->event_mask_ext;
+ if (from_type->fixed_ctr)
+ to_type->fixed_ctr = from_type->fixed_ctr;
+ if (from_type->fixed_ctl)
+ to_type->fixed_ctl = from_type->fixed_ctl;
+ if (from_type->fixed_ctr_bits)
+ to_type->fixed_ctr_bits = from_type->fixed_ctr_bits;
+ if (from_type->num_shared_regs)
+ to_type->num_shared_regs = from_type->num_shared_regs;
+ if (from_type->constraints)
+ to_type->constraints = from_type->constraints;
+ if (from_type->ops)
+ to_type->ops = from_type->ops;
+ if (from_type->event_descs)
+ to_type->event_descs = from_type->event_descs;
+ if (from_type->format_group)
+ to_type->format_group = from_type->format_group;
+ if (from_type->attr_update)
+ to_type->attr_update = from_type->attr_update;
+}
+
+static struct intel_uncore_type **
+uncore_get_uncores(enum uncore_access_type type_id, int num_extra,
+ struct intel_uncore_type **extra)
+{
+ struct intel_uncore_type **types, **start_types;
+ int i;
+
+ start_types = types = intel_uncore_generic_init_uncores(type_id, num_extra);
+
+ /* Only copy the customized features */
+ for (; *types; types++) {
+ if ((*types)->type_id >= UNCORE_SPR_NUM_UNCORE_TYPES)
+ continue;
+ uncore_type_customized_copy(*types, spr_uncores[(*types)->type_id]);
+ }
+
+ for (i = 0; i < num_extra; i++, types++)
+ *types = extra[i];
+
+ return start_types;
+}
+
+static struct intel_uncore_type *
+uncore_find_type_by_id(struct intel_uncore_type **types, int type_id)
+{
+ for (; *types; types++) {
+ if (type_id == (*types)->type_id)
+ return *types;
+ }
+
+ return NULL;
+}
+
+static int uncore_type_max_boxes(struct intel_uncore_type **types,
+ int type_id)
+{
+ struct intel_uncore_type *type;
+ int i, max = 0;
+
+ type = uncore_find_type_by_id(types, type_id);
+ if (!type)
+ return 0;
+
+ for (i = 0; i < type->num_boxes; i++) {
+ if (type->box_ids[i] > max)
+ max = type->box_ids[i];
+ }
+
+ return max + 1;
+}
+
+void spr_uncore_cpu_init(void)
+{
+ uncore_msr_uncores = uncore_get_uncores(UNCORE_ACCESS_MSR,
+ UNCORE_SPR_MSR_EXTRA_UNCORES,
+ spr_msr_uncores);
+
+ spr_uncore_iio_free_running.num_boxes = uncore_type_max_boxes(uncore_msr_uncores, UNCORE_SPR_IIO);
+}
+
+int spr_uncore_pci_init(void)
+{
+ uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI, 0, NULL);
+ return 0;
+}
+
+void spr_uncore_mmio_init(void)
+{
+ int ret = snbep_pci2phy_map_init(0x3250, SKX_CPUNODEID, SKX_GIDNIDMAP, true);
+
+ if (ret)
+ uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO, 0, NULL);
+ else {
+ uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO,
+ UNCORE_SPR_MMIO_EXTRA_UNCORES,
+ spr_mmio_uncores);
+
+ spr_uncore_imc_free_running.num_boxes = uncore_type_max_boxes(uncore_mmio_uncores, UNCORE_SPR_IMC) / 2;
+ }
+}
+
+/* end of SPR uncore support */
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 2bf1c7ea2758..e3ac05c97b5e 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -656,6 +656,10 @@ struct x86_hybrid_pmu {
struct event_constraint *event_constraints;
struct event_constraint *pebs_constraints;
struct extra_reg *extra_regs;
+
+ unsigned int late_ack :1,
+ mid_ack :1,
+ enabled_ack :1;
};
static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu)
@@ -686,6 +690,16 @@ extern struct static_key_false perf_is_hybrid;
__Fp; \
}))
+#define hybrid_bit(_pmu, _field) \
+({ \
+ bool __Fp = x86_pmu._field; \
+ \
+ if (is_hybrid() && (_pmu)) \
+ __Fp = hybrid_pmu(_pmu)->_field; \
+ \
+ __Fp; \
+})
+
enum hybrid_pmu_type {
hybrid_big = 0x40,
hybrid_small = 0x20,
@@ -755,6 +769,7 @@ struct x86_pmu {
/* PMI handler bits */
unsigned int late_ack :1,
+ mid_ack :1,
enabled_ack :1;
/*
* sysfs attrs
@@ -1115,9 +1130,10 @@ void x86_pmu_stop(struct perf_event *event, int flags);
static inline void x86_pmu_disable_event(struct perf_event *event)
{
+ u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
struct hw_perf_event *hwc = &event->hw;
- wrmsrl(hwc->config_base, hwc->config);
+ wrmsrl(hwc->config_base, hwc->config & ~disable_mask);
if (is_counter_pair(hwc))
wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0);
diff --git a/arch/x86/include/asm/amd-ibs.h b/arch/x86/include/asm/amd-ibs.h
new file mode 100644
index 000000000000..46e1df45efc0
--- /dev/null
+++ b/arch/x86/include/asm/amd-ibs.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * From PPR Vol 1 for AMD Family 19h Model 01h B1
+ * 55898 Rev 0.35 - Feb 5, 2021
+ */
+
+#include <asm/msr-index.h>
+
+/*
+ * IBS Hardware MSRs
+ */
+
+/* MSR 0xc0011030: IBS Fetch Control */
+union ibs_fetch_ctl {
+ __u64 val;
+ struct {
+ __u64 fetch_maxcnt:16,/* 0-15: instruction fetch max. count */
+ fetch_cnt:16, /* 16-31: instruction fetch count */
+ fetch_lat:16, /* 32-47: instruction fetch latency */
+ fetch_en:1, /* 48: instruction fetch enable */
+ fetch_val:1, /* 49: instruction fetch valid */
+ fetch_comp:1, /* 50: instruction fetch complete */
+ ic_miss:1, /* 51: i-cache miss */
+ phy_addr_valid:1,/* 52: physical address valid */
+ l1tlb_pgsz:2, /* 53-54: i-cache L1TLB page size
+ * (needs IbsPhyAddrValid) */
+ l1tlb_miss:1, /* 55: i-cache fetch missed in L1TLB */
+ l2tlb_miss:1, /* 56: i-cache fetch missed in L2TLB */
+ rand_en:1, /* 57: random tagging enable */
+ fetch_l2_miss:1,/* 58: L2 miss for sampled fetch
+ * (needs IbsFetchComp) */
+ reserved:5; /* 59-63: reserved */
+ };
+};
+
+/* MSR 0xc0011033: IBS Execution Control */
+union ibs_op_ctl {
+ __u64 val;
+ struct {
+ __u64 opmaxcnt:16, /* 0-15: periodic op max. count */
+ reserved0:1, /* 16: reserved */
+ op_en:1, /* 17: op sampling enable */
+ op_val:1, /* 18: op sample valid */
+ cnt_ctl:1, /* 19: periodic op counter control */
+ opmaxcnt_ext:7, /* 20-26: upper 7 bits of periodic op maximum count */
+ reserved1:5, /* 27-31: reserved */
+ opcurcnt:27, /* 32-58: periodic op counter current count */
+ reserved2:5; /* 59-63: reserved */
+ };
+};
+
+/* MSR 0xc0011035: IBS Op Data 2 */
+union ibs_op_data {
+ __u64 val;
+ struct {
+ __u64 comp_to_ret_ctr:16, /* 0-15: op completion to retire count */
+ tag_to_ret_ctr:16, /* 15-31: op tag to retire count */
+ reserved1:2, /* 32-33: reserved */
+ op_return:1, /* 34: return op */
+ op_brn_taken:1, /* 35: taken branch op */
+ op_brn_misp:1, /* 36: mispredicted branch op */
+ op_brn_ret:1, /* 37: branch op retired */
+ op_rip_invalid:1, /* 38: RIP is invalid */
+ op_brn_fuse:1, /* 39: fused branch op */
+ op_microcode:1, /* 40: microcode op */
+ reserved2:23; /* 41-63: reserved */
+ };
+};
+
+/* MSR 0xc0011036: IBS Op Data 2 */
+union ibs_op_data2 {
+ __u64 val;
+ struct {
+ __u64 data_src:3, /* 0-2: data source */
+ reserved0:1, /* 3: reserved */
+ rmt_node:1, /* 4: destination node */
+ cache_hit_st:1, /* 5: cache hit state */
+ reserved1:57; /* 5-63: reserved */
+ };
+};
+
+/* MSR 0xc0011037: IBS Op Data 3 */
+union ibs_op_data3 {
+ __u64 val;
+ struct {
+ __u64 ld_op:1, /* 0: load op */
+ st_op:1, /* 1: store op */
+ dc_l1tlb_miss:1, /* 2: data cache L1TLB miss */
+ dc_l2tlb_miss:1, /* 3: data cache L2TLB hit in 2M page */
+ dc_l1tlb_hit_2m:1, /* 4: data cache L1TLB hit in 2M page */
+ dc_l1tlb_hit_1g:1, /* 5: data cache L1TLB hit in 1G page */
+ dc_l2tlb_hit_2m:1, /* 6: data cache L2TLB hit in 2M page */
+ dc_miss:1, /* 7: data cache miss */
+ dc_mis_acc:1, /* 8: misaligned access */
+ reserved:4, /* 9-12: reserved */
+ dc_wc_mem_acc:1, /* 13: write combining memory access */
+ dc_uc_mem_acc:1, /* 14: uncacheable memory access */
+ dc_locked_op:1, /* 15: locked operation */
+ dc_miss_no_mab_alloc:1, /* 16: DC miss with no MAB allocated */
+ dc_lin_addr_valid:1, /* 17: data cache linear address valid */
+ dc_phy_addr_valid:1, /* 18: data cache physical address valid */
+ dc_l2_tlb_hit_1g:1, /* 19: data cache L2 hit in 1GB page */
+ l2_miss:1, /* 20: L2 cache miss */
+ sw_pf:1, /* 21: software prefetch */
+ op_mem_width:4, /* 22-25: load/store size in bytes */
+ op_dc_miss_open_mem_reqs:6, /* 26-31: outstanding mem reqs on DC fill */
+ dc_miss_lat:16, /* 32-47: data cache miss latency */
+ tlb_refill_lat:16; /* 48-63: L1 TLB refill latency */
+ };
+};
+
+/* MSR 0xc001103c: IBS Fetch Control Extended */
+union ic_ibs_extd_ctl {
+ __u64 val;
+ struct {
+ __u64 itlb_refill_lat:16, /* 0-15: ITLB Refill latency for sampled fetch */
+ reserved:48; /* 16-63: reserved */
+ };
+};
+
+/*
+ * IBS driver related
+ */
+
+struct perf_ibs_data {
+ u32 size;
+ union {
+ u32 data[0]; /* data buffer starts here */
+ u32 caps;
+ };
+ u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
+};
diff --git a/arch/x86/include/asm/kfence.h b/arch/x86/include/asm/kfence.h
index 05b48b33baf0..ff5c7134a37a 100644
--- a/arch/x86/include/asm/kfence.h
+++ b/arch/x86/include/asm/kfence.h
@@ -8,6 +8,8 @@
#ifndef _ASM_X86_KFENCE_H
#define _ASM_X86_KFENCE_H
+#ifndef MODULE
+
#include <linux/bug.h>
#include <linux/kfence.h>
@@ -66,4 +68,6 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect)
return true;
}
+#endif /* !MODULE */
+
#endif /* _ASM_X86_KFENCE_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 974cbfb1eefe..af6ce8d4c86a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1038,6 +1038,13 @@ struct kvm_arch {
struct list_head lpage_disallowed_mmu_pages;
struct kvm_page_track_notifier_node mmu_sp_tracker;
struct kvm_page_track_notifier_head track_notifier_head;
+ /*
+ * Protects marking pages unsync during page faults, as TDP MMU page
+ * faults only take mmu_lock for read. For simplicity, the unsync
+ * pages lock is always taken when marking pages unsync regardless of
+ * whether mmu_lock is held for read or write.
+ */
+ spinlock_t mmu_unsync_pages_lock;
struct list_head assigned_dev_head;
struct iommu_domain *iommu_domain;
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 0607ec4f5091..da9321548f6f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -265,6 +265,7 @@ enum mcp_flags {
MCP_TIMESTAMP = BIT(0), /* log time stamp */
MCP_UC = BIT(1), /* log uncorrected errors */
MCP_DONTLOG = BIT(2), /* only clear, don't log */
+ MCP_QUEUE_LOG = BIT(3), /* only queue to genpool */
};
bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 1e0d13c9fda6..9ad2acaaae9b 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -797,6 +797,8 @@ extern int set_tsc_mode(unsigned int val);
DECLARE_PER_CPU(u64, msr_misc_features_shadow);
+extern u16 get_llc_id(unsigned int cpu);
+
#ifdef CONFIG_CPU_SUP_AMD
extern u32 amd_get_nodes_per_socket(void);
extern u32 amd_get_highest_perf(void);
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index e322676039f4..b00dbc5fac2b 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -184,6 +184,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define V_IGN_TPR_SHIFT 20
#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
+#define V_IRQ_INJECTION_BITS_MASK (V_IRQ_MASK | V_INTR_PRIO_MASK | V_IGN_TPR_MASK)
+
#define V_INTR_MASKING_SHIFT 24
#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index d5c691a3208b..39224e035e47 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1986,7 +1986,8 @@ static struct irq_chip ioapic_chip __read_mostly = {
.irq_set_affinity = ioapic_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_get_irqchip_state = ioapic_irq_get_chip_state,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .flags = IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_AFFINITY_PRE_STARTUP,
};
static struct irq_chip ioapic_ir_chip __read_mostly = {
@@ -1999,7 +2000,8 @@ static struct irq_chip ioapic_ir_chip __read_mostly = {
.irq_set_affinity = ioapic_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_get_irqchip_state = ioapic_irq_get_chip_state,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .flags = IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_AFFINITY_PRE_STARTUP,
};
static inline void init_IO_APIC_traps(void)
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 44ebe25e7703..dbacb9ec8843 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -58,11 +58,13 @@ msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force)
* The quirk bit is not set in this case.
* - The new vector is the same as the old vector
* - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up)
+ * - The interrupt is not yet started up
* - The new destination CPU is the same as the old destination CPU
*/
if (!irqd_msi_nomask_quirk(irqd) ||
cfg->vector == old_cfg.vector ||
old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR ||
+ !irqd_is_started(irqd) ||
cfg->dest_apicid == old_cfg.dest_apicid) {
irq_msi_update_msg(irqd, cfg);
return ret;
@@ -150,7 +152,8 @@ static struct irq_chip pci_msi_controller = {
.irq_ack = irq_chip_ack_parent,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_set_affinity = msi_set_affinity,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .flags = IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_AFFINITY_PRE_STARTUP,
};
int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
@@ -219,7 +222,8 @@ static struct irq_chip pci_msi_ir_controller = {
.irq_mask = pci_msi_mask_irq,
.irq_ack = irq_chip_ack_parent,
.irq_retrigger = irq_chip_retrigger_hierarchy,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .flags = IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_AFFINITY_PRE_STARTUP,
};
static struct msi_domain_info pci_msi_ir_domain_info = {
@@ -273,7 +277,8 @@ static struct irq_chip dmar_msi_controller = {
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_compose_msi_msg = dmar_msi_compose_msg,
.irq_write_msi_msg = dmar_msi_write_msg,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .flags = IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_AFFINITY_PRE_STARTUP,
};
static int dmar_msi_init(struct irq_domain *domain,
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index b7c003013d41..2131af9f2fa2 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -438,7 +438,7 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
node = numa_cpu_node(cpu);
if (node == NUMA_NO_NODE)
- node = per_cpu(cpu_llc_id, cpu);
+ node = get_llc_id(cpu);
/*
* On multi-fabric platform (e.g. Numascale NumaChip) a
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 64b805bd6a54..0f8885949e8c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -79,6 +79,12 @@ EXPORT_SYMBOL(smp_num_siblings);
/* Last level cache ID of each logical CPU */
DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
+u16 get_llc_id(unsigned int cpu)
+{
+ return per_cpu(cpu_llc_id, cpu);
+}
+EXPORT_SYMBOL_GPL(get_llc_id);
+
/* correctly size the local cpu masks */
void __init setup_cpu_local_masks(void)
{
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 22791aadc085..8cb7816d03b4 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -817,7 +817,10 @@ log_it:
if (mca_cfg.dont_log_ce && !mce_usable_address(&m))
goto clear_it;
- mce_log(&m);
+ if (flags & MCP_QUEUE_LOG)
+ mce_gen_pool_add(&m);
+ else
+ mce_log(&m);
clear_it:
/*
@@ -1639,10 +1642,12 @@ static void __mcheck_cpu_init_generic(void)
m_fl = MCP_DONTLOG;
/*
- * Log the machine checks left over from the previous reset.
+ * Log the machine checks left over from the previous reset. Log them
+ * only, do not start processing them. That will happen in mcheck_late_init()
+ * when all consumers have been registered on the notifier chain.
*/
bitmap_fill(all_banks, MAX_NR_BANKS);
- machine_check_poll(MCP_UC | m_fl, &all_banks);
+ machine_check_poll(MCP_UC | MCP_QUEUE_LOG | m_fl, &all_banks);
cr4_set_bits(X86_CR4_MCE);
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 4e86d97f9653..0bfc14041bbb 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -235,7 +235,7 @@ static void __maybe_unused raise_mce(struct mce *m)
unsigned long start;
int cpu;
- get_online_cpus();
+ cpus_read_lock();
cpumask_copy(mce_inject_cpumask, cpu_online_mask);
cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
for_each_online_cpu(cpu) {
@@ -269,7 +269,7 @@ static void __maybe_unused raise_mce(struct mce *m)
}
raise_local();
put_cpu();
- put_online_cpus();
+ cpus_read_unlock();
} else {
preempt_disable();
raise_local();
@@ -529,7 +529,7 @@ static void do_inject(void)
cpu = get_nbc_for_node(topology_die_id(cpu));
}
- get_online_cpus();
+ cpus_read_lock();
if (!cpu_online(cpu))
goto err;
@@ -553,7 +553,7 @@ static void do_inject(void)
}
err:
- put_online_cpus();
+ cpus_read_unlock();
}
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 6a6318e9590c..efb69be41ab1 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -55,7 +55,7 @@ LIST_HEAD(microcode_cache);
* All non cpu-hotplug-callback call sites use:
*
* - microcode_mutex to synchronize with each other;
- * - get/put_online_cpus() to synchronize with
+ * - cpus_read_lock/unlock() to synchronize with
* the cpu-hotplug-callback call sites.
*
* We guarantee that only a single cpu is being
@@ -431,7 +431,7 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
return ret;
}
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&microcode_mutex);
if (do_microcode_update(buf, len) == 0)
@@ -441,7 +441,7 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
perf_check_microcode();
mutex_unlock(&microcode_mutex);
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
@@ -629,7 +629,7 @@ static ssize_t reload_store(struct device *dev,
if (val != 1)
return size;
- get_online_cpus();
+ cpus_read_lock();
ret = check_online_cpus();
if (ret)
@@ -644,7 +644,7 @@ static ssize_t reload_store(struct device *dev,
mutex_unlock(&microcode_mutex);
put:
- put_online_cpus();
+ cpus_read_unlock();
if (ret == 0)
ret = size;
@@ -853,14 +853,14 @@ static int __init microcode_init(void)
if (IS_ERR(microcode_pdev))
return PTR_ERR(microcode_pdev);
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&microcode_mutex);
error = subsys_interface_register(&mc_cpu_interface);
if (!error)
perf_check_microcode();
mutex_unlock(&microcode_mutex);
- put_online_cpus();
+ cpus_read_unlock();
if (error)
goto out_pdev;
@@ -892,13 +892,13 @@ static int __init microcode_init(void)
&cpu_root_microcode_group);
out_driver:
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&microcode_mutex);
subsys_interface_unregister(&mc_cpu_interface);
mutex_unlock(&microcode_mutex);
- put_online_cpus();
+ cpus_read_unlock();
out_pdev:
platform_device_unregister(microcode_pdev);
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
index a76694bffe86..2746cac9d8a9 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.c
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
@@ -336,7 +336,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
replace = -1;
/* No CPU hotplug when we change MTRR entries */
- get_online_cpus();
+ cpus_read_lock();
/* Search for existing MTRR */
mutex_lock(&mtrr_mutex);
@@ -398,7 +398,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
error = i;
out:
mutex_unlock(&mtrr_mutex);
- put_online_cpus();
+ cpus_read_unlock();
return error;
}
@@ -485,7 +485,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
max = num_var_ranges;
/* No CPU hotplug when we change MTRR entries */
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&mtrr_mutex);
if (reg < 0) {
/* Search for existing MTRR */
@@ -520,7 +520,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
error = reg;
out:
mutex_unlock(&mtrr_mutex);
- put_online_cpus();
+ cpus_read_unlock();
return error;
}
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 23001ae03e82..4b8813bafffd 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -57,128 +57,57 @@ static void
mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m,
struct rdt_resource *r);
-#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains)
+#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.domains)
-struct rdt_resource rdt_resources_all[] = {
+struct rdt_hw_resource rdt_resources_all[] = {
[RDT_RESOURCE_L3] =
{
- .rid = RDT_RESOURCE_L3,
- .name = "L3",
- .domains = domain_init(RDT_RESOURCE_L3),
- .msr_base = MSR_IA32_L3_CBM_BASE,
- .msr_update = cat_wrmsr,
- .cache_level = 3,
- .cache = {
- .min_cbm_bits = 1,
- .cbm_idx_mult = 1,
- .cbm_idx_offset = 0,
- },
- .parse_ctrlval = parse_cbm,
- .format_str = "%d=%0*x",
- .fflags = RFTYPE_RES_CACHE,
- },
- [RDT_RESOURCE_L3DATA] =
- {
- .rid = RDT_RESOURCE_L3DATA,
- .name = "L3DATA",
- .domains = domain_init(RDT_RESOURCE_L3DATA),
- .msr_base = MSR_IA32_L3_CBM_BASE,
- .msr_update = cat_wrmsr,
- .cache_level = 3,
- .cache = {
- .min_cbm_bits = 1,
- .cbm_idx_mult = 2,
- .cbm_idx_offset = 0,
+ .r_resctrl = {
+ .rid = RDT_RESOURCE_L3,
+ .name = "L3",
+ .cache_level = 3,
+ .cache = {
+ .min_cbm_bits = 1,
+ },
+ .domains = domain_init(RDT_RESOURCE_L3),
+ .parse_ctrlval = parse_cbm,
+ .format_str = "%d=%0*x",
+ .fflags = RFTYPE_RES_CACHE,
},
- .parse_ctrlval = parse_cbm,
- .format_str = "%d=%0*x",
- .fflags = RFTYPE_RES_CACHE,
- },
- [RDT_RESOURCE_L3CODE] =
- {
- .rid = RDT_RESOURCE_L3CODE,
- .name = "L3CODE",
- .domains = domain_init(RDT_RESOURCE_L3CODE),
.msr_base = MSR_IA32_L3_CBM_BASE,
.msr_update = cat_wrmsr,
- .cache_level = 3,
- .cache = {
- .min_cbm_bits = 1,
- .cbm_idx_mult = 2,
- .cbm_idx_offset = 1,
- },
- .parse_ctrlval = parse_cbm,
- .format_str = "%d=%0*x",
- .fflags = RFTYPE_RES_CACHE,
},
[RDT_RESOURCE_L2] =
{
- .rid = RDT_RESOURCE_L2,
- .name = "L2",
- .domains = domain_init(RDT_RESOURCE_L2),
- .msr_base = MSR_IA32_L2_CBM_BASE,
- .msr_update = cat_wrmsr,
- .cache_level = 2,
- .cache = {
- .min_cbm_bits = 1,
- .cbm_idx_mult = 1,
- .cbm_idx_offset = 0,
+ .r_resctrl = {
+ .rid = RDT_RESOURCE_L2,
+ .name = "L2",
+ .cache_level = 2,
+ .cache = {
+ .min_cbm_bits = 1,
+ },
+ .domains = domain_init(RDT_RESOURCE_L2),
+ .parse_ctrlval = parse_cbm,
+ .format_str = "%d=%0*x",
+ .fflags = RFTYPE_RES_CACHE,
},
- .parse_ctrlval = parse_cbm,
- .format_str = "%d=%0*x",
- .fflags = RFTYPE_RES_CACHE,
- },
- [RDT_RESOURCE_L2DATA] =
- {
- .rid = RDT_RESOURCE_L2DATA,
- .name = "L2DATA",
- .domains = domain_init(RDT_RESOURCE_L2DATA),
.msr_base = MSR_IA32_L2_CBM_BASE,
.msr_update = cat_wrmsr,
- .cache_level = 2,
- .cache = {
- .min_cbm_bits = 1,
- .cbm_idx_mult = 2,
- .cbm_idx_offset = 0,
- },
- .parse_ctrlval = parse_cbm,
- .format_str = "%d=%0*x",
- .fflags = RFTYPE_RES_CACHE,
- },
- [RDT_RESOURCE_L2CODE] =
- {
- .rid = RDT_RESOURCE_L2CODE,
- .name = "L2CODE",
- .domains = domain_init(RDT_RESOURCE_L2CODE),
- .msr_base = MSR_IA32_L2_CBM_BASE,
- .msr_update = cat_wrmsr,
- .cache_level = 2,
- .cache = {
- .min_cbm_bits = 1,
- .cbm_idx_mult = 2,
- .cbm_idx_offset = 1,
- },
- .parse_ctrlval = parse_cbm,
- .format_str = "%d=%0*x",
- .fflags = RFTYPE_RES_CACHE,
},
[RDT_RESOURCE_MBA] =
{
- .rid = RDT_RESOURCE_MBA,
- .name = "MB",
- .domains = domain_init(RDT_RESOURCE_MBA),
- .cache_level = 3,
- .parse_ctrlval = parse_bw,
- .format_str = "%d=%*u",
- .fflags = RFTYPE_RES_MB,
+ .r_resctrl = {
+ .rid = RDT_RESOURCE_MBA,
+ .name = "MB",
+ .cache_level = 3,
+ .domains = domain_init(RDT_RESOURCE_MBA),
+ .parse_ctrlval = parse_bw,
+ .format_str = "%d=%*u",
+ .fflags = RFTYPE_RES_MB,
+ },
},
};
-static unsigned int cbm_idx(struct rdt_resource *r, unsigned int closid)
-{
- return closid * r->cache.cbm_idx_mult + r->cache.cbm_idx_offset;
-}
-
/*
* cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
* as they do not have CPUID enumeration support for Cache allocation.
@@ -199,7 +128,8 @@ static unsigned int cbm_idx(struct rdt_resource *r, unsigned int closid)
*/
static inline void cache_alloc_hsw_probe(void)
{
- struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
+ struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_L3];
+ struct rdt_resource *r = &hw_res->r_resctrl;
u32 l, h, max_cbm = BIT_MASK(20) - 1;
if (wrmsr_safe(MSR_IA32_L3_CBM_BASE, max_cbm, 0))
@@ -211,7 +141,7 @@ static inline void cache_alloc_hsw_probe(void)
if (l != max_cbm)
return;
- r->num_closid = 4;
+ hw_res->num_closid = 4;
r->default_ctrl = max_cbm;
r->cache.cbm_len = 20;
r->cache.shareable_bits = 0xc0000;
@@ -225,7 +155,7 @@ static inline void cache_alloc_hsw_probe(void)
bool is_mba_sc(struct rdt_resource *r)
{
if (!r)
- return rdt_resources_all[RDT_RESOURCE_MBA].membw.mba_sc;
+ return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.mba_sc;
return r->membw.mba_sc;
}
@@ -253,12 +183,13 @@ static inline bool rdt_get_mb_table(struct rdt_resource *r)
static bool __get_mem_config_intel(struct rdt_resource *r)
{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
union cpuid_0x10_3_eax eax;
union cpuid_0x10_x_edx edx;
u32 ebx, ecx, max_delay;
cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
- r->num_closid = edx.split.cos_max + 1;
+ hw_res->num_closid = edx.split.cos_max + 1;
max_delay = eax.split.max_delay + 1;
r->default_ctrl = MAX_MBA_BW;
r->membw.arch_needs_linear = true;
@@ -287,12 +218,13 @@ static bool __get_mem_config_intel(struct rdt_resource *r)
static bool __rdt_get_mem_config_amd(struct rdt_resource *r)
{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
union cpuid_0x10_3_eax eax;
union cpuid_0x10_x_edx edx;
u32 ebx, ecx;
cpuid_count(0x80000020, 1, &eax.full, &ebx, &ecx, &edx.full);
- r->num_closid = edx.split.cos_max + 1;
+ hw_res->num_closid = edx.split.cos_max + 1;
r->default_ctrl = MAX_MBA_BW_AMD;
/* AMD does not use delay */
@@ -317,12 +249,13 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r)
static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
union cpuid_0x10_1_eax eax;
union cpuid_0x10_x_edx edx;
u32 ebx, ecx;
cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx, &edx.full);
- r->num_closid = edx.split.cos_max + 1;
+ hw_res->num_closid = edx.split.cos_max + 1;
r->cache.cbm_len = eax.split.cbm_len + 1;
r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
r->cache.shareable_bits = ebx & r->default_ctrl;
@@ -331,43 +264,35 @@ static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
r->alloc_enabled = true;
}
-static void rdt_get_cdp_config(int level, int type)
+static void rdt_get_cdp_config(int level)
{
- struct rdt_resource *r_l = &rdt_resources_all[level];
- struct rdt_resource *r = &rdt_resources_all[type];
-
- r->num_closid = r_l->num_closid / 2;
- r->cache.cbm_len = r_l->cache.cbm_len;
- r->default_ctrl = r_l->default_ctrl;
- r->cache.shareable_bits = r_l->cache.shareable_bits;
- r->data_width = (r->cache.cbm_len + 3) / 4;
- r->alloc_capable = true;
/*
* By default, CDP is disabled. CDP can be enabled by mount parameter
* "cdp" during resctrl file system mount time.
*/
- r->alloc_enabled = false;
+ rdt_resources_all[level].cdp_enabled = false;
+ rdt_resources_all[level].r_resctrl.cdp_capable = true;
}
static void rdt_get_cdp_l3_config(void)
{
- rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA);
- rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3CODE);
+ rdt_get_cdp_config(RDT_RESOURCE_L3);
}
static void rdt_get_cdp_l2_config(void)
{
- rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA);
- rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2CODE);
+ rdt_get_cdp_config(RDT_RESOURCE_L2);
}
static void
mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
{
unsigned int i;
+ struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
for (i = m->low; i < m->high; i++)
- wrmsrl(r->msr_base + i, d->ctrl_val[i]);
+ wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
}
/*
@@ -389,19 +314,23 @@ mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m,
struct rdt_resource *r)
{
unsigned int i;
+ struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
/* Write the delay values for mba. */
for (i = m->low; i < m->high; i++)
- wrmsrl(r->msr_base + i, delay_bw_map(d->ctrl_val[i], r));
+ wrmsrl(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], r));
}
static void
cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
{
unsigned int i;
+ struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
for (i = m->low; i < m->high; i++)
- wrmsrl(r->msr_base + cbm_idx(r, i), d->ctrl_val[i]);
+ wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
}
struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r)
@@ -417,16 +346,22 @@ struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r)
return NULL;
}
+u32 resctrl_arch_get_num_closid(struct rdt_resource *r)
+{
+ return resctrl_to_arch_res(r)->num_closid;
+}
+
void rdt_ctrl_update(void *arg)
{
struct msr_param *m = arg;
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
struct rdt_resource *r = m->res;
int cpu = smp_processor_id();
struct rdt_domain *d;
d = get_domain_from_cpu(cpu, r);
if (d) {
- r->msr_update(d, m, r);
+ hw_res->msr_update(d, m, r);
return;
}
pr_warn_once("cpu %d not found in any domain for resource %s\n",
@@ -468,6 +403,7 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm)
{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
int i;
/*
@@ -476,7 +412,7 @@ void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm)
* For Memory Allocation: Set b/w requested to 100%
* and the bandwidth in MBps to U32_MAX
*/
- for (i = 0; i < r->num_closid; i++, dc++, dm++) {
+ for (i = 0; i < hw_res->num_closid; i++, dc++, dm++) {
*dc = r->default_ctrl;
*dm = MBA_MAX_MBPS;
}
@@ -484,26 +420,30 @@ void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm)
static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+ struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
struct msr_param m;
u32 *dc, *dm;
- dc = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL);
+ dc = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->ctrl_val),
+ GFP_KERNEL);
if (!dc)
return -ENOMEM;
- dm = kmalloc_array(r->num_closid, sizeof(*d->mbps_val), GFP_KERNEL);
+ dm = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->mbps_val),
+ GFP_KERNEL);
if (!dm) {
kfree(dc);
return -ENOMEM;
}
- d->ctrl_val = dc;
- d->mbps_val = dm;
+ hw_dom->ctrl_val = dc;
+ hw_dom->mbps_val = dm;
setup_default_ctrlval(r, dc, dm);
m.low = 0;
- m.high = r->num_closid;
- r->msr_update(d, &m, r);
+ m.high = hw_res->num_closid;
+ hw_res->msr_update(d, &m, r);
return 0;
}
@@ -560,6 +500,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
{
int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
struct list_head *add_pos = NULL;
+ struct rdt_hw_domain *hw_dom;
struct rdt_domain *d;
d = rdt_find_domain(r, id, &add_pos);
@@ -575,10 +516,11 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
return;
}
- d = kzalloc_node(sizeof(*d), GFP_KERNEL, cpu_to_node(cpu));
- if (!d)
+ hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu));
+ if (!hw_dom)
return;
+ d = &hw_dom->d_resctrl;
d->id = id;
cpumask_set_cpu(cpu, &d->cpu_mask);
@@ -607,6 +549,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
static void domain_remove_cpu(int cpu, struct rdt_resource *r)
{
int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
+ struct rdt_hw_domain *hw_dom;
struct rdt_domain *d;
d = rdt_find_domain(r, id, NULL);
@@ -614,6 +557,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
pr_warn("Couldn't find cache id for CPU %d\n", cpu);
return;
}
+ hw_dom = resctrl_to_arch_dom(d);
cpumask_clear_cpu(cpu, &d->cpu_mask);
if (cpumask_empty(&d->cpu_mask)) {
@@ -646,16 +590,16 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
if (d->plr)
d->plr->d = NULL;
- kfree(d->ctrl_val);
- kfree(d->mbps_val);
+ kfree(hw_dom->ctrl_val);
+ kfree(hw_dom->mbps_val);
bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
kfree(d->mbm_local);
- kfree(d);
+ kfree(hw_dom);
return;
}
- if (r == &rdt_resources_all[RDT_RESOURCE_L3]) {
+ if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) {
if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
cancel_delayed_work(&d->mbm_over);
mbm_setup_overflow_handler(d, 0);
@@ -732,13 +676,8 @@ static int resctrl_offline_cpu(unsigned int cpu)
static __init void rdt_init_padding(void)
{
struct rdt_resource *r;
- int cl;
for_each_alloc_capable_rdt_resource(r) {
- cl = strlen(r->name);
- if (cl > max_name_width)
- max_name_width = cl;
-
if (r->data_width > max_data_width)
max_data_width = r->data_width;
}
@@ -827,19 +766,22 @@ static bool __init rdt_cpu_has(int flag)
static __init bool get_mem_config(void)
{
+ struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_MBA];
+
if (!rdt_cpu_has(X86_FEATURE_MBA))
return false;
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
- return __get_mem_config_intel(&rdt_resources_all[RDT_RESOURCE_MBA]);
+ return __get_mem_config_intel(&hw_res->r_resctrl);
else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
- return __rdt_get_mem_config_amd(&rdt_resources_all[RDT_RESOURCE_MBA]);
+ return __rdt_get_mem_config_amd(&hw_res->r_resctrl);
return false;
}
static __init bool get_rdt_alloc_resources(void)
{
+ struct rdt_resource *r;
bool ret = false;
if (rdt_alloc_capable)
@@ -849,14 +791,16 @@ static __init bool get_rdt_alloc_resources(void)
return false;
if (rdt_cpu_has(X86_FEATURE_CAT_L3)) {
- rdt_get_cache_alloc_cfg(1, &rdt_resources_all[RDT_RESOURCE_L3]);
+ r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ rdt_get_cache_alloc_cfg(1, r);
if (rdt_cpu_has(X86_FEATURE_CDP_L3))
rdt_get_cdp_l3_config();
ret = true;
}
if (rdt_cpu_has(X86_FEATURE_CAT_L2)) {
/* CPUID 0x10.2 fields are same format at 0x10.1 */
- rdt_get_cache_alloc_cfg(2, &rdt_resources_all[RDT_RESOURCE_L2]);
+ r = &rdt_resources_all[RDT_RESOURCE_L2].r_resctrl;
+ rdt_get_cache_alloc_cfg(2, r);
if (rdt_cpu_has(X86_FEATURE_CDP_L2))
rdt_get_cdp_l2_config();
ret = true;
@@ -870,6 +814,8 @@ static __init bool get_rdt_alloc_resources(void)
static __init bool get_rdt_mon_resources(void)
{
+ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+
if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC))
rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID);
if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL))
@@ -880,7 +826,7 @@ static __init bool get_rdt_mon_resources(void)
if (!rdt_mon_features)
return false;
- return !rdt_get_mon_l3_config(&rdt_resources_all[RDT_RESOURCE_L3]);
+ return !rdt_get_mon_l3_config(r);
}
static __init void __check_quirks_intel(void)
@@ -918,42 +864,40 @@ static __init bool get_rdt_resources(void)
static __init void rdt_init_res_defs_intel(void)
{
+ struct rdt_hw_resource *hw_res;
struct rdt_resource *r;
for_each_rdt_resource(r) {
+ hw_res = resctrl_to_arch_res(r);
+
if (r->rid == RDT_RESOURCE_L3 ||
- r->rid == RDT_RESOURCE_L3DATA ||
- r->rid == RDT_RESOURCE_L3CODE ||
- r->rid == RDT_RESOURCE_L2 ||
- r->rid == RDT_RESOURCE_L2DATA ||
- r->rid == RDT_RESOURCE_L2CODE) {
+ r->rid == RDT_RESOURCE_L2) {
r->cache.arch_has_sparse_bitmaps = false;
r->cache.arch_has_empty_bitmaps = false;
r->cache.arch_has_per_cpu_cfg = false;
} else if (r->rid == RDT_RESOURCE_MBA) {
- r->msr_base = MSR_IA32_MBA_THRTL_BASE;
- r->msr_update = mba_wrmsr_intel;
+ hw_res->msr_base = MSR_IA32_MBA_THRTL_BASE;
+ hw_res->msr_update = mba_wrmsr_intel;
}
}
}
static __init void rdt_init_res_defs_amd(void)
{
+ struct rdt_hw_resource *hw_res;
struct rdt_resource *r;
for_each_rdt_resource(r) {
+ hw_res = resctrl_to_arch_res(r);
+
if (r->rid == RDT_RESOURCE_L3 ||
- r->rid == RDT_RESOURCE_L3DATA ||
- r->rid == RDT_RESOURCE_L3CODE ||
- r->rid == RDT_RESOURCE_L2 ||
- r->rid == RDT_RESOURCE_L2DATA ||
- r->rid == RDT_RESOURCE_L2CODE) {
+ r->rid == RDT_RESOURCE_L2) {
r->cache.arch_has_sparse_bitmaps = true;
r->cache.arch_has_empty_bitmaps = true;
r->cache.arch_has_per_cpu_cfg = true;
} else if (r->rid == RDT_RESOURCE_MBA) {
- r->msr_base = MSR_IA32_MBA_BW_BASE;
- r->msr_update = mba_wrmsr_amd;
+ hw_res->msr_base = MSR_IA32_MBA_BW_BASE;
+ hw_res->msr_update = mba_wrmsr_amd;
}
}
}
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index c877642e8a14..87666275eed9 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -57,20 +57,23 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
return true;
}
-int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
+int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
struct rdt_domain *d)
{
+ struct resctrl_staged_config *cfg;
+ struct rdt_resource *r = s->res;
unsigned long bw_val;
- if (d->have_new_ctrl) {
+ cfg = &d->staged_config[s->conf_type];
+ if (cfg->have_new_ctrl) {
rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
return -EINVAL;
}
if (!bw_validate(data->buf, &bw_val, r))
return -EINVAL;
- d->new_ctrl = bw_val;
- d->have_new_ctrl = true;
+ cfg->new_ctrl = bw_val;
+ cfg->have_new_ctrl = true;
return 0;
}
@@ -125,13 +128,16 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
* Read one cache bit mask (hex). Check that it is valid for the current
* resource type.
*/
-int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
+int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
struct rdt_domain *d)
{
struct rdtgroup *rdtgrp = data->rdtgrp;
+ struct resctrl_staged_config *cfg;
+ struct rdt_resource *r = s->res;
u32 cbm_val;
- if (d->have_new_ctrl) {
+ cfg = &d->staged_config[s->conf_type];
+ if (cfg->have_new_ctrl) {
rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
return -EINVAL;
}
@@ -160,12 +166,12 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
* The CBM may not overlap with the CBM of another closid if
* either is exclusive.
*/
- if (rdtgroup_cbm_overlaps(r, d, cbm_val, rdtgrp->closid, true)) {
+ if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, true)) {
rdt_last_cmd_puts("Overlaps with exclusive group\n");
return -EINVAL;
}
- if (rdtgroup_cbm_overlaps(r, d, cbm_val, rdtgrp->closid, false)) {
+ if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, false)) {
if (rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
rdt_last_cmd_puts("Overlaps with other group\n");
@@ -173,8 +179,8 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
}
}
- d->new_ctrl = cbm_val;
- d->have_new_ctrl = true;
+ cfg->new_ctrl = cbm_val;
+ cfg->have_new_ctrl = true;
return 0;
}
@@ -185,9 +191,12 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
* separated by ";". The "id" is in decimal, and must match one of
* the "id"s for this resource.
*/
-static int parse_line(char *line, struct rdt_resource *r,
+static int parse_line(char *line, struct resctrl_schema *s,
struct rdtgroup *rdtgrp)
{
+ enum resctrl_conf_type t = s->conf_type;
+ struct resctrl_staged_config *cfg;
+ struct rdt_resource *r = s->res;
struct rdt_parse_data data;
char *dom = NULL, *id;
struct rdt_domain *d;
@@ -213,9 +222,10 @@ next:
if (d->id == dom_id) {
data.buf = dom;
data.rdtgrp = rdtgrp;
- if (r->parse_ctrlval(&data, r, d))
+ if (r->parse_ctrlval(&data, s, d))
return -EINVAL;
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+ cfg = &d->staged_config[t];
/*
* In pseudo-locking setup mode and just
* parsed a valid CBM that should be
@@ -224,9 +234,9 @@ next:
* the required initialization for single
* region and return.
*/
- rdtgrp->plr->r = r;
+ rdtgrp->plr->s = s;
rdtgrp->plr->d = d;
- rdtgrp->plr->cbm = d->new_ctrl;
+ rdtgrp->plr->cbm = cfg->new_ctrl;
d->plr = rdtgrp->plr;
return 0;
}
@@ -236,28 +246,72 @@ next:
return -EINVAL;
}
-int update_domains(struct rdt_resource *r, int closid)
+static u32 get_config_index(u32 closid, enum resctrl_conf_type type)
{
+ switch (type) {
+ default:
+ case CDP_NONE:
+ return closid;
+ case CDP_CODE:
+ return closid * 2 + 1;
+ case CDP_DATA:
+ return closid * 2;
+ }
+}
+
+static bool apply_config(struct rdt_hw_domain *hw_dom,
+ struct resctrl_staged_config *cfg, u32 idx,
+ cpumask_var_t cpu_mask, bool mba_sc)
+{
+ struct rdt_domain *dom = &hw_dom->d_resctrl;
+ u32 *dc = !mba_sc ? hw_dom->ctrl_val : hw_dom->mbps_val;
+
+ if (cfg->new_ctrl != dc[idx]) {
+ cpumask_set_cpu(cpumask_any(&dom->cpu_mask), cpu_mask);
+ dc[idx] = cfg->new_ctrl;
+
+ return true;
+ }
+
+ return false;
+}
+
+int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
+{
+ struct resctrl_staged_config *cfg;
+ struct rdt_hw_domain *hw_dom;
struct msr_param msr_param;
+ enum resctrl_conf_type t;
cpumask_var_t cpu_mask;
struct rdt_domain *d;
bool mba_sc;
- u32 *dc;
int cpu;
+ u32 idx;
if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
return -ENOMEM;
- msr_param.low = closid;
- msr_param.high = msr_param.low + 1;
- msr_param.res = r;
-
mba_sc = is_mba_sc(r);
+ msr_param.res = NULL;
list_for_each_entry(d, &r->domains, list) {
- dc = !mba_sc ? d->ctrl_val : d->mbps_val;
- if (d->have_new_ctrl && d->new_ctrl != dc[closid]) {
- cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
- dc[closid] = d->new_ctrl;
+ hw_dom = resctrl_to_arch_dom(d);
+ for (t = 0; t < CDP_NUM_TYPES; t++) {
+ cfg = &hw_dom->d_resctrl.staged_config[t];
+ if (!cfg->have_new_ctrl)
+ continue;
+
+ idx = get_config_index(closid, t);
+ if (!apply_config(hw_dom, cfg, idx, cpu_mask, mba_sc))
+ continue;
+
+ if (!msr_param.res) {
+ msr_param.low = idx;
+ msr_param.high = msr_param.low + 1;
+ msr_param.res = r;
+ } else {
+ msr_param.low = min(msr_param.low, idx);
+ msr_param.high = max(msr_param.high, idx + 1);
+ }
}
}
@@ -284,11 +338,11 @@ done:
static int rdtgroup_parse_resource(char *resname, char *tok,
struct rdtgroup *rdtgrp)
{
- struct rdt_resource *r;
+ struct resctrl_schema *s;
- for_each_alloc_enabled_rdt_resource(r) {
- if (!strcmp(resname, r->name) && rdtgrp->closid < r->num_closid)
- return parse_line(tok, r, rdtgrp);
+ list_for_each_entry(s, &resctrl_schema_all, list) {
+ if (!strcmp(resname, s->name) && rdtgrp->closid < s->num_closid)
+ return parse_line(tok, s, rdtgrp);
}
rdt_last_cmd_printf("Unknown or unsupported resource name '%s'\n", resname);
return -EINVAL;
@@ -297,6 +351,7 @@ static int rdtgroup_parse_resource(char *resname, char *tok,
ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
+ struct resctrl_schema *s;
struct rdtgroup *rdtgrp;
struct rdt_domain *dom;
struct rdt_resource *r;
@@ -327,9 +382,9 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
goto out;
}
- for_each_alloc_enabled_rdt_resource(r) {
- list_for_each_entry(dom, &r->domains, list)
- dom->have_new_ctrl = false;
+ list_for_each_entry(s, &resctrl_schema_all, list) {
+ list_for_each_entry(dom, &s->res->domains, list)
+ memset(dom->staged_config, 0, sizeof(dom->staged_config));
}
while ((tok = strsep(&buf, "\n")) != NULL) {
@@ -349,8 +404,9 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
goto out;
}
- for_each_alloc_enabled_rdt_resource(r) {
- ret = update_domains(r, rdtgrp->closid);
+ list_for_each_entry(s, &resctrl_schema_all, list) {
+ r = s->res;
+ ret = resctrl_arch_update_domains(r, rdtgrp->closid);
if (ret)
goto out;
}
@@ -371,19 +427,31 @@ out:
return ret ?: nbytes;
}
-static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid)
+u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
+ u32 closid, enum resctrl_conf_type type)
+{
+ struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
+ u32 idx = get_config_index(closid, type);
+
+ if (!is_mba_sc(r))
+ return hw_dom->ctrl_val[idx];
+ return hw_dom->mbps_val[idx];
+}
+
+static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid)
{
+ struct rdt_resource *r = schema->res;
struct rdt_domain *dom;
bool sep = false;
u32 ctrl_val;
- seq_printf(s, "%*s:", max_name_width, r->name);
+ seq_printf(s, "%*s:", max_name_width, schema->name);
list_for_each_entry(dom, &r->domains, list) {
if (sep)
seq_puts(s, ";");
- ctrl_val = (!is_mba_sc(r) ? dom->ctrl_val[closid] :
- dom->mbps_val[closid]);
+ ctrl_val = resctrl_arch_get_config(r, dom, closid,
+ schema->conf_type);
seq_printf(s, r->format_str, dom->id, max_data_width,
ctrl_val);
sep = true;
@@ -394,16 +462,17 @@ static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid)
int rdtgroup_schemata_show(struct kernfs_open_file *of,
struct seq_file *s, void *v)
{
+ struct resctrl_schema *schema;
struct rdtgroup *rdtgrp;
- struct rdt_resource *r;
int ret = 0;
u32 closid;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (rdtgrp) {
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
- for_each_alloc_enabled_rdt_resource(r)
- seq_printf(s, "%s:uninitialized\n", r->name);
+ list_for_each_entry(schema, &resctrl_schema_all, list) {
+ seq_printf(s, "%s:uninitialized\n", schema->name);
+ }
} else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
if (!rdtgrp->plr->d) {
rdt_last_cmd_clear();
@@ -411,15 +480,15 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
ret = -ENODEV;
} else {
seq_printf(s, "%s:%d=%x\n",
- rdtgrp->plr->r->name,
+ rdtgrp->plr->s->res->name,
rdtgrp->plr->d->id,
rdtgrp->plr->cbm);
}
} else {
closid = rdtgrp->closid;
- for_each_alloc_enabled_rdt_resource(r) {
- if (closid < r->num_closid)
- show_doms(s, r, closid);
+ list_for_each_entry(schema, &resctrl_schema_all, list) {
+ if (closid < schema->num_closid)
+ show_doms(s, schema, closid);
}
}
} else {
@@ -449,6 +518,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
int rdtgroup_mondata_show(struct seq_file *m, void *arg)
{
struct kernfs_open_file *of = m->private;
+ struct rdt_hw_resource *hw_res;
u32 resid, evtid, domid;
struct rdtgroup *rdtgrp;
struct rdt_resource *r;
@@ -468,7 +538,8 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
domid = md.u.domid;
evtid = md.u.evtid;
- r = &rdt_resources_all[resid];
+ hw_res = &rdt_resources_all[resid];
+ r = &hw_res->r_resctrl;
d = rdt_find_domain(r, domid, NULL);
if (IS_ERR_OR_NULL(d)) {
ret = -ENOENT;
@@ -482,7 +553,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
else if (rr.val & RMID_VAL_UNAVAIL)
seq_puts(m, "Unavailable\n");
else
- seq_printf(m, "%llu\n", rr.val * r->mon_scale);
+ seq_printf(m, "%llu\n", rr.val * hw_res->mon_scale);
out:
rdtgroup_kn_unlock(of->kn);
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 6a5f60a37219..1d647188a43b 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -2,6 +2,7 @@
#ifndef _ASM_X86_RESCTRL_INTERNAL_H
#define _ASM_X86_RESCTRL_INTERNAL_H
+#include <linux/resctrl.h>
#include <linux/sched.h>
#include <linux/kernfs.h>
#include <linux/fs_context.h>
@@ -109,6 +110,7 @@ extern unsigned int resctrl_cqm_threshold;
extern bool rdt_alloc_capable;
extern bool rdt_mon_capable;
extern unsigned int rdt_mon_features;
+extern struct list_head resctrl_schema_all;
enum rdt_group_type {
RDTCTRL_GROUP = 0,
@@ -161,8 +163,8 @@ struct mongroup {
/**
* struct pseudo_lock_region - pseudo-lock region information
- * @r: RDT resource to which this pseudo-locked region
- * belongs
+ * @s: Resctrl schema for the resource to which this
+ * pseudo-locked region belongs
* @d: RDT domain to which this pseudo-locked region
* belongs
* @cbm: bitmask of the pseudo-locked region
@@ -182,7 +184,7 @@ struct mongroup {
* @pm_reqs: Power management QoS requests related to this region
*/
struct pseudo_lock_region {
- struct rdt_resource *r;
+ struct resctrl_schema *s;
struct rdt_domain *d;
u32 cbm;
wait_queue_head_t lock_thread_wq;
@@ -303,44 +305,25 @@ struct mbm_state {
};
/**
- * struct rdt_domain - group of cpus sharing an RDT resource
- * @list: all instances of this resource
- * @id: unique id for this instance
- * @cpu_mask: which cpus share this resource
- * @rmid_busy_llc:
- * bitmap of which limbo RMIDs are above threshold
- * @mbm_total: saved state for MBM total bandwidth
- * @mbm_local: saved state for MBM local bandwidth
- * @mbm_over: worker to periodically read MBM h/w counters
- * @cqm_limbo: worker to periodically read CQM h/w counters
- * @mbm_work_cpu:
- * worker cpu for MBM h/w counters
- * @cqm_work_cpu:
- * worker cpu for CQM h/w counters
+ * struct rdt_hw_domain - Arch private attributes of a set of CPUs that share
+ * a resource
+ * @d_resctrl: Properties exposed to the resctrl file system
* @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)
* @mbps_val: When mba_sc is enabled, this holds the bandwidth in MBps
- * @new_ctrl: new ctrl value to be loaded
- * @have_new_ctrl: did user provide new_ctrl for this domain
- * @plr: pseudo-locked region (if any) associated with domain
+ *
+ * Members of this structure are accessed via helpers that provide abstraction.
*/
-struct rdt_domain {
- struct list_head list;
- int id;
- struct cpumask cpu_mask;
- unsigned long *rmid_busy_llc;
- struct mbm_state *mbm_total;
- struct mbm_state *mbm_local;
- struct delayed_work mbm_over;
- struct delayed_work cqm_limbo;
- int mbm_work_cpu;
- int cqm_work_cpu;
+struct rdt_hw_domain {
+ struct rdt_domain d_resctrl;
u32 *ctrl_val;
u32 *mbps_val;
- u32 new_ctrl;
- bool have_new_ctrl;
- struct pseudo_lock_region *plr;
};
+static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r)
+{
+ return container_of(r, struct rdt_hw_domain, d_resctrl);
+}
+
/**
* struct msr_param - set a range of MSRs from a domain
* @res: The resource to use
@@ -349,69 +332,8 @@ struct rdt_domain {
*/
struct msr_param {
struct rdt_resource *res;
- int low;
- int high;
-};
-
-/**
- * struct rdt_cache - Cache allocation related data
- * @cbm_len: Length of the cache bit mask
- * @min_cbm_bits: Minimum number of consecutive bits to be set
- * @cbm_idx_mult: Multiplier of CBM index
- * @cbm_idx_offset: Offset of CBM index. CBM index is computed by:
- * closid * cbm_idx_multi + cbm_idx_offset
- * in a cache bit mask
- * @shareable_bits: Bitmask of shareable resource with other
- * executing entities
- * @arch_has_sparse_bitmaps: True if a bitmap like f00f is valid.
- * @arch_has_empty_bitmaps: True if the '0' bitmap is valid.
- * @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache
- * level has CPU scope.
- */
-struct rdt_cache {
- unsigned int cbm_len;
- unsigned int min_cbm_bits;
- unsigned int cbm_idx_mult;
- unsigned int cbm_idx_offset;
- unsigned int shareable_bits;
- bool arch_has_sparse_bitmaps;
- bool arch_has_empty_bitmaps;
- bool arch_has_per_cpu_cfg;
-};
-
-/**
- * enum membw_throttle_mode - System's memory bandwidth throttling mode
- * @THREAD_THROTTLE_UNDEFINED: Not relevant to the system
- * @THREAD_THROTTLE_MAX: Memory bandwidth is throttled at the core
- * always using smallest bandwidth percentage
- * assigned to threads, aka "max throttling"
- * @THREAD_THROTTLE_PER_THREAD: Memory bandwidth is throttled at the thread
- */
-enum membw_throttle_mode {
- THREAD_THROTTLE_UNDEFINED = 0,
- THREAD_THROTTLE_MAX,
- THREAD_THROTTLE_PER_THREAD,
-};
-
-/**
- * struct rdt_membw - Memory bandwidth allocation related data
- * @min_bw: Minimum memory bandwidth percentage user can request
- * @bw_gran: Granularity at which the memory bandwidth is allocated
- * @delay_linear: True if memory B/W delay is in linear scale
- * @arch_needs_linear: True if we can't configure non-linear resources
- * @throttle_mode: Bandwidth throttling mode when threads request
- * different memory bandwidths
- * @mba_sc: True if MBA software controller(mba_sc) is enabled
- * @mb_map: Mapping of memory B/W percentage to memory B/W delay
- */
-struct rdt_membw {
- u32 min_bw;
- u32 bw_gran;
- u32 delay_linear;
- bool arch_needs_linear;
- enum membw_throttle_mode throttle_mode;
- bool mba_sc;
- u32 *mb_map;
+ u32 low;
+ u32 high;
};
static inline bool is_llc_occupancy_enabled(void)
@@ -446,111 +368,103 @@ struct rdt_parse_data {
};
/**
- * struct rdt_resource - attributes of an RDT resource
- * @rid: The index of the resource
- * @alloc_enabled: Is allocation enabled on this machine
- * @mon_enabled: Is monitoring enabled for this feature
- * @alloc_capable: Is allocation available on this machine
- * @mon_capable: Is monitor feature available on this machine
- * @name: Name to use in "schemata" file
- * @num_closid: Number of CLOSIDs available
- * @cache_level: Which cache level defines scope of this resource
- * @default_ctrl: Specifies default cache cbm or memory B/W percent.
+ * struct rdt_hw_resource - arch private attributes of a resctrl resource
+ * @r_resctrl: Attributes of the resource used directly by resctrl.
+ * @num_closid: Maximum number of closid this hardware can support,
+ * regardless of CDP. This is exposed via
+ * resctrl_arch_get_num_closid() to avoid confusion
+ * with struct resctrl_schema's property of the same name,
+ * which has been corrected for features like CDP.
* @msr_base: Base MSR address for CBMs
* @msr_update: Function pointer to update QOS MSRs
- * @data_width: Character width of data when displaying
- * @domains: All domains for this resource
- * @cache: Cache allocation related data
- * @membw: If the component has bandwidth controls, their properties.
- * @format_str: Per resource format string to show domain value
- * @parse_ctrlval: Per resource function pointer to parse control values
- * @evt_list: List of monitoring events
- * @num_rmid: Number of RMIDs available
* @mon_scale: cqm counter * mon_scale = occupancy in bytes
* @mbm_width: Monitor width, to detect and correct for overflow.
- * @fflags: flags to choose base and info files
+ * @cdp_enabled: CDP state of this resource
+ *
+ * Members of this structure are either private to the architecture
+ * e.g. mbm_width, or accessed via helpers that provide abstraction. e.g.
+ * msr_update and msr_base.
*/
-struct rdt_resource {
- int rid;
- bool alloc_enabled;
- bool mon_enabled;
- bool alloc_capable;
- bool mon_capable;
- char *name;
- int num_closid;
- int cache_level;
- u32 default_ctrl;
+struct rdt_hw_resource {
+ struct rdt_resource r_resctrl;
+ u32 num_closid;
unsigned int msr_base;
void (*msr_update) (struct rdt_domain *d, struct msr_param *m,
struct rdt_resource *r);
- int data_width;
- struct list_head domains;
- struct rdt_cache cache;
- struct rdt_membw membw;
- const char *format_str;
- int (*parse_ctrlval)(struct rdt_parse_data *data,
- struct rdt_resource *r,
- struct rdt_domain *d);
- struct list_head evt_list;
- int num_rmid;
unsigned int mon_scale;
unsigned int mbm_width;
- unsigned long fflags;
+ bool cdp_enabled;
};
-int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
+static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r)
+{
+ return container_of(r, struct rdt_hw_resource, r_resctrl);
+}
+
+int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
struct rdt_domain *d);
-int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
+int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
struct rdt_domain *d);
extern struct mutex rdtgroup_mutex;
-extern struct rdt_resource rdt_resources_all[];
+extern struct rdt_hw_resource rdt_resources_all[];
extern struct rdtgroup rdtgroup_default;
DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
extern struct dentry *debugfs_resctrl;
-enum {
+enum resctrl_res_level {
RDT_RESOURCE_L3,
- RDT_RESOURCE_L3DATA,
- RDT_RESOURCE_L3CODE,
RDT_RESOURCE_L2,
- RDT_RESOURCE_L2DATA,
- RDT_RESOURCE_L2CODE,
RDT_RESOURCE_MBA,
/* Must be the last */
RDT_NUM_RESOURCES,
};
+static inline struct rdt_resource *resctrl_inc(struct rdt_resource *res)
+{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(res);
+
+ hw_res++;
+ return &hw_res->r_resctrl;
+}
+
+static inline bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l)
+{
+ return rdt_resources_all[l].cdp_enabled;
+}
+
+int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable);
+
+/*
+ * To return the common struct rdt_resource, which is contained in struct
+ * rdt_hw_resource, walk the resctrl member of struct rdt_hw_resource.
+ */
#define for_each_rdt_resource(r) \
- for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
- r++)
+ for (r = &rdt_resources_all[0].r_resctrl; \
+ r <= &rdt_resources_all[RDT_NUM_RESOURCES - 1].r_resctrl; \
+ r = resctrl_inc(r))
#define for_each_capable_rdt_resource(r) \
- for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
- r++) \
+ for_each_rdt_resource(r) \
if (r->alloc_capable || r->mon_capable)
#define for_each_alloc_capable_rdt_resource(r) \
- for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
- r++) \
+ for_each_rdt_resource(r) \
if (r->alloc_capable)
#define for_each_mon_capable_rdt_resource(r) \
- for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
- r++) \
+ for_each_rdt_resource(r) \
if (r->mon_capable)
#define for_each_alloc_enabled_rdt_resource(r) \
- for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
- r++) \
+ for_each_rdt_resource(r) \
if (r->alloc_enabled)
#define for_each_mon_enabled_rdt_resource(r) \
- for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
- r++) \
+ for_each_rdt_resource(r) \
if (r->mon_enabled)
/* CPUID.(EAX=10H, ECX=ResID=1).EAX */
@@ -594,7 +508,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off);
int rdtgroup_schemata_show(struct kernfs_open_file *of,
struct seq_file *s, void *v);
-bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
+bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
unsigned long cbm, int closid, bool exclusive);
unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d,
unsigned long cbm);
@@ -609,7 +523,6 @@ void rdt_pseudo_lock_release(void);
int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
-int update_domains(struct rdt_resource *r, int closid);
int closids_supported(void);
void closid_free(int closid);
int alloc_rmid(void);
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index f07c10b87a87..c9f0f3d63f75 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -174,7 +174,7 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
struct rdt_resource *r;
u32 crmid = 1, nrmid;
- r = &rdt_resources_all[RDT_RESOURCE_L3];
+ r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
/*
* Skip RMID 0 and start from RMID 1 and check all the RMIDs that
@@ -232,7 +232,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
int cpu;
u64 val;
- r = &rdt_resources_all[RDT_RESOURCE_L3];
+ r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
entry->busy = 0;
cpu = get_cpu();
@@ -285,15 +285,15 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
return chunks >>= shift;
}
-static int __mon_event_count(u32 rmid, struct rmid_read *rr)
+static u64 __mon_event_count(u32 rmid, struct rmid_read *rr)
{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(rr->r);
struct mbm_state *m;
u64 chunks, tval;
tval = __rmid_read(rmid, rr->evtid);
if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) {
- rr->val = tval;
- return -EINVAL;
+ return tval;
}
switch (rr->evtid) {
case QOS_L3_OCCUP_EVENT_ID:
@@ -307,10 +307,10 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
break;
default:
/*
- * Code would never reach here because
- * an invalid event id would fail the __rmid_read.
+ * Code would never reach here because an invalid
+ * event id would fail the __rmid_read.
*/
- return -EINVAL;
+ return RMID_VAL_ERROR;
}
if (rr->first) {
@@ -319,7 +319,7 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
return 0;
}
- chunks = mbm_overflow_count(m->prev_msr, tval, rr->r->mbm_width);
+ chunks = mbm_overflow_count(m->prev_msr, tval, hw_res->mbm_width);
m->chunks += chunks;
m->prev_msr = tval;
@@ -334,7 +334,7 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
*/
static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
{
- struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(rr->r);
struct mbm_state *m = &rr->d->mbm_local[rmid];
u64 tval, cur_bw, chunks;
@@ -342,8 +342,8 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
return;
- chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width);
- cur_bw = (get_corrected_mbm_count(rmid, chunks) * r->mon_scale) >> 20;
+ chunks = mbm_overflow_count(m->prev_bw_msr, tval, hw_res->mbm_width);
+ cur_bw = (get_corrected_mbm_count(rmid, chunks) * hw_res->mon_scale) >> 20;
if (m->delta_comp)
m->delta_bw = abs(cur_bw - m->prev_bw);
@@ -361,23 +361,29 @@ void mon_event_count(void *info)
struct rdtgroup *rdtgrp, *entry;
struct rmid_read *rr = info;
struct list_head *head;
+ u64 ret_val;
rdtgrp = rr->rgrp;
- if (__mon_event_count(rdtgrp->mon.rmid, rr))
- return;
+ ret_val = __mon_event_count(rdtgrp->mon.rmid, rr);
/*
- * For Ctrl groups read data from child monitor groups.
+ * For Ctrl groups read data from child monitor groups and
+ * add them together. Count events which are read successfully.
+ * Discard the rmid_read's reporting errors.
*/
head = &rdtgrp->mon.crdtgrp_list;
if (rdtgrp->type == RDTCTRL_GROUP) {
list_for_each_entry(entry, head, mon.crdtgrp_list) {
- if (__mon_event_count(entry->mon.rmid, rr))
- return;
+ if (__mon_event_count(entry->mon.rmid, rr) == 0)
+ ret_val = 0;
}
}
+
+ /* Report error if none of rmid_reads are successful */
+ if (ret_val)
+ rr->val = ret_val;
}
/*
@@ -416,6 +422,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
{
u32 closid, rmid, cur_msr, cur_msr_val, new_msr_val;
struct mbm_state *pmbm_data, *cmbm_data;
+ struct rdt_hw_resource *hw_r_mba;
+ struct rdt_hw_domain *hw_dom_mba;
u32 cur_bw, delta_bw, user_bw;
struct rdt_resource *r_mba;
struct rdt_domain *dom_mba;
@@ -425,7 +433,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
if (!is_mbm_local_enabled())
return;
- r_mba = &rdt_resources_all[RDT_RESOURCE_MBA];
+ hw_r_mba = &rdt_resources_all[RDT_RESOURCE_MBA];
+ r_mba = &hw_r_mba->r_resctrl;
closid = rgrp->closid;
rmid = rgrp->mon.rmid;
pmbm_data = &dom_mbm->mbm_local[rmid];
@@ -435,11 +444,16 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
pr_warn_once("Failure to get domain for MBA update\n");
return;
}
+ hw_dom_mba = resctrl_to_arch_dom(dom_mba);
cur_bw = pmbm_data->prev_bw;
- user_bw = dom_mba->mbps_val[closid];
+ user_bw = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE);
delta_bw = pmbm_data->delta_bw;
- cur_msr_val = dom_mba->ctrl_val[closid];
+ /*
+ * resctrl_arch_get_config() chooses the mbps/ctrl value to return
+ * based on is_mba_sc(). For now, reach into the hw_dom.
+ */
+ cur_msr_val = hw_dom_mba->ctrl_val[closid];
/*
* For Ctrl groups read data from child monitor groups.
@@ -474,9 +488,9 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
return;
}
- cur_msr = r_mba->msr_base + closid;
+ cur_msr = hw_r_mba->msr_base + closid;
wrmsrl(cur_msr, delay_bw_map(new_msr_val, r_mba));
- dom_mba->ctrl_val[closid] = new_msr_val;
+ hw_dom_mba->ctrl_val[closid] = new_msr_val;
/*
* Delta values are updated dynamically package wise for each
@@ -538,7 +552,7 @@ void cqm_handle_limbo(struct work_struct *work)
mutex_lock(&rdtgroup_mutex);
- r = &rdt_resources_all[RDT_RESOURCE_L3];
+ r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
d = container_of(work, struct rdt_domain, cqm_limbo.work);
__check_limbo(d, false);
@@ -574,7 +588,7 @@ void mbm_handle_overflow(struct work_struct *work)
if (!static_branch_likely(&rdt_mon_enable_key))
goto out_unlock;
- r = &rdt_resources_all[RDT_RESOURCE_L3];
+ r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
d = container_of(work, struct rdt_domain, mbm_over.work);
list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
@@ -671,15 +685,16 @@ static void l3_mon_evt_init(struct rdt_resource *r)
int rdt_get_mon_l3_config(struct rdt_resource *r)
{
unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
unsigned int cl_size = boot_cpu_data.x86_cache_size;
int ret;
- r->mon_scale = boot_cpu_data.x86_cache_occ_scale;
+ hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale;
r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
- r->mbm_width = MBM_CNTR_WIDTH_BASE;
+ hw_res->mbm_width = MBM_CNTR_WIDTH_BASE;
if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX)
- r->mbm_width += mbm_offset;
+ hw_res->mbm_width += mbm_offset;
else if (mbm_offset > MBM_CNTR_WIDTH_OFFSET_MAX)
pr_warn("Ignoring impossible MBM counter offset\n");
@@ -693,7 +708,7 @@ int rdt_get_mon_l3_config(struct rdt_resource *r)
resctrl_cqm_threshold = cl_size * 1024 / r->num_rmid;
/* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */
- resctrl_cqm_threshold /= r->mon_scale;
+ resctrl_cqm_threshold /= hw_res->mon_scale;
ret = dom_data_init(r);
if (ret)
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 2207916cae65..db813f819ad6 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -250,7 +250,7 @@ static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
plr->line_size = 0;
kfree(plr->kmem);
plr->kmem = NULL;
- plr->r = NULL;
+ plr->s = NULL;
if (plr->d)
plr->d->plr = NULL;
plr->d = NULL;
@@ -294,10 +294,10 @@ static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
ci = get_cpu_cacheinfo(plr->cpu);
- plr->size = rdtgroup_cbm_to_size(plr->r, plr->d, plr->cbm);
+ plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm);
for (i = 0; i < ci->num_leaves; i++) {
- if (ci->info_list[i].level == plr->r->cache_level) {
+ if (ci->info_list[i].level == plr->s->res->cache_level) {
plr->line_size = ci->info_list[i].coherency_line_size;
return 0;
}
@@ -688,8 +688,8 @@ int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
* resource, the portion of cache used by it should be made
* unavailable to all future allocations from both resources.
*/
- if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled ||
- rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled) {
+ if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3) ||
+ resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) {
rdt_last_cmd_puts("CDP enabled\n");
return -EINVAL;
}
@@ -800,7 +800,7 @@ bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm
unsigned long cbm_b;
if (d->plr) {
- cbm_len = d->plr->r->cache.cbm_len;
+ cbm_len = d->plr->s->res->cache.cbm_len;
cbm_b = d->plr->cbm;
if (bitmap_intersects(&cbm, &cbm_b, cbm_len))
return true;
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 01fd30e7829d..b57b3db9a6a7 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -39,6 +39,9 @@ static struct kernfs_root *rdt_root;
struct rdtgroup rdtgroup_default;
LIST_HEAD(rdt_all_groups);
+/* list of entries for the schemata file */
+LIST_HEAD(resctrl_schema_all);
+
/* Kernel fs node for "info" directory under root */
static struct kernfs_node *kn_info;
@@ -100,12 +103,12 @@ int closids_supported(void)
static void closid_init(void)
{
- struct rdt_resource *r;
- int rdt_min_closid = 32;
+ struct resctrl_schema *s;
+ u32 rdt_min_closid = 32;
/* Compute rdt_min_closid across all resources */
- for_each_alloc_enabled_rdt_resource(r)
- rdt_min_closid = min(rdt_min_closid, r->num_closid);
+ list_for_each_entry(s, &resctrl_schema_all, list)
+ rdt_min_closid = min(rdt_min_closid, s->num_closid);
closid_free_map = BIT_MASK(rdt_min_closid) - 1;
@@ -842,16 +845,17 @@ static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
static int rdt_num_closids_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct resctrl_schema *s = of->kn->parent->priv;
- seq_printf(seq, "%d\n", r->num_closid);
+ seq_printf(seq, "%u\n", s->num_closid);
return 0;
}
static int rdt_default_ctrl_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct resctrl_schema *s = of->kn->parent->priv;
+ struct rdt_resource *r = s->res;
seq_printf(seq, "%x\n", r->default_ctrl);
return 0;
@@ -860,7 +864,8 @@ static int rdt_default_ctrl_show(struct kernfs_open_file *of,
static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct resctrl_schema *s = of->kn->parent->priv;
+ struct rdt_resource *r = s->res;
seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
return 0;
@@ -869,7 +874,8 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
static int rdt_shareable_bits_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct resctrl_schema *s = of->kn->parent->priv;
+ struct rdt_resource *r = s->res;
seq_printf(seq, "%x\n", r->cache.shareable_bits);
return 0;
@@ -892,38 +898,40 @@ static int rdt_shareable_bits_show(struct kernfs_open_file *of,
static int rdt_bit_usage_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct resctrl_schema *s = of->kn->parent->priv;
/*
* Use unsigned long even though only 32 bits are used to ensure
* test_bit() is used safely.
*/
unsigned long sw_shareable = 0, hw_shareable = 0;
unsigned long exclusive = 0, pseudo_locked = 0;
+ struct rdt_resource *r = s->res;
struct rdt_domain *dom;
int i, hwb, swb, excl, psl;
enum rdtgrp_mode mode;
bool sep = false;
- u32 *ctrl;
+ u32 ctrl_val;
mutex_lock(&rdtgroup_mutex);
hw_shareable = r->cache.shareable_bits;
list_for_each_entry(dom, &r->domains, list) {
if (sep)
seq_putc(seq, ';');
- ctrl = dom->ctrl_val;
sw_shareable = 0;
exclusive = 0;
seq_printf(seq, "%d=", dom->id);
- for (i = 0; i < closids_supported(); i++, ctrl++) {
+ for (i = 0; i < closids_supported(); i++) {
if (!closid_allocated(i))
continue;
+ ctrl_val = resctrl_arch_get_config(r, dom, i,
+ s->conf_type);
mode = rdtgroup_mode_by_closid(i);
switch (mode) {
case RDT_MODE_SHAREABLE:
- sw_shareable |= *ctrl;
+ sw_shareable |= ctrl_val;
break;
case RDT_MODE_EXCLUSIVE:
- exclusive |= *ctrl;
+ exclusive |= ctrl_val;
break;
case RDT_MODE_PSEUDO_LOCKSETUP:
/*
@@ -970,7 +978,8 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
static int rdt_min_bw_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct resctrl_schema *s = of->kn->parent->priv;
+ struct rdt_resource *r = s->res;
seq_printf(seq, "%u\n", r->membw.min_bw);
return 0;
@@ -1001,7 +1010,8 @@ static int rdt_mon_features_show(struct kernfs_open_file *of,
static int rdt_bw_gran_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct resctrl_schema *s = of->kn->parent->priv;
+ struct rdt_resource *r = s->res;
seq_printf(seq, "%u\n", r->membw.bw_gran);
return 0;
@@ -1010,7 +1020,8 @@ static int rdt_bw_gran_show(struct kernfs_open_file *of,
static int rdt_delay_linear_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct resctrl_schema *s = of->kn->parent->priv;
+ struct rdt_resource *r = s->res;
seq_printf(seq, "%u\n", r->membw.delay_linear);
return 0;
@@ -1020,8 +1031,9 @@ static int max_threshold_occ_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
struct rdt_resource *r = of->kn->parent->priv;
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
- seq_printf(seq, "%u\n", resctrl_cqm_threshold * r->mon_scale);
+ seq_printf(seq, "%u\n", resctrl_cqm_threshold * hw_res->mon_scale);
return 0;
}
@@ -1029,7 +1041,8 @@ static int max_threshold_occ_show(struct kernfs_open_file *of,
static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct resctrl_schema *s = of->kn->parent->priv;
+ struct rdt_resource *r = s->res;
if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD)
seq_puts(seq, "per-thread\n");
@@ -1042,7 +1055,7 @@ static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
- struct rdt_resource *r = of->kn->parent->priv;
+ struct rdt_hw_resource *hw_res;
unsigned int bytes;
int ret;
@@ -1053,7 +1066,8 @@ static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
if (bytes > (boot_cpu_data.x86_cache_size * 1024))
return -EINVAL;
- resctrl_cqm_threshold = bytes / r->mon_scale;
+ hw_res = resctrl_to_arch_res(of->kn->parent->priv);
+ resctrl_cqm_threshold = bytes / hw_res->mon_scale;
return nbytes;
}
@@ -1078,76 +1092,17 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of,
return 0;
}
-/**
- * rdt_cdp_peer_get - Retrieve CDP peer if it exists
- * @r: RDT resource to which RDT domain @d belongs
- * @d: Cache instance for which a CDP peer is requested
- * @r_cdp: RDT resource that shares hardware with @r (RDT resource peer)
- * Used to return the result.
- * @d_cdp: RDT domain that shares hardware with @d (RDT domain peer)
- * Used to return the result.
- *
- * RDT resources are managed independently and by extension the RDT domains
- * (RDT resource instances) are managed independently also. The Code and
- * Data Prioritization (CDP) RDT resources, while managed independently,
- * could refer to the same underlying hardware. For example,
- * RDT_RESOURCE_L2CODE and RDT_RESOURCE_L2DATA both refer to the L2 cache.
- *
- * When provided with an RDT resource @r and an instance of that RDT
- * resource @d rdt_cdp_peer_get() will return if there is a peer RDT
- * resource and the exact instance that shares the same hardware.
- *
- * Return: 0 if a CDP peer was found, <0 on error or if no CDP peer exists.
- * If a CDP peer was found, @r_cdp will point to the peer RDT resource
- * and @d_cdp will point to the peer RDT domain.
- */
-static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d,
- struct rdt_resource **r_cdp,
- struct rdt_domain **d_cdp)
+static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type)
{
- struct rdt_resource *_r_cdp = NULL;
- struct rdt_domain *_d_cdp = NULL;
- int ret = 0;
-
- switch (r->rid) {
- case RDT_RESOURCE_L3DATA:
- _r_cdp = &rdt_resources_all[RDT_RESOURCE_L3CODE];
- break;
- case RDT_RESOURCE_L3CODE:
- _r_cdp = &rdt_resources_all[RDT_RESOURCE_L3DATA];
- break;
- case RDT_RESOURCE_L2DATA:
- _r_cdp = &rdt_resources_all[RDT_RESOURCE_L2CODE];
- break;
- case RDT_RESOURCE_L2CODE:
- _r_cdp = &rdt_resources_all[RDT_RESOURCE_L2DATA];
- break;
+ switch (my_type) {
+ case CDP_CODE:
+ return CDP_DATA;
+ case CDP_DATA:
+ return CDP_CODE;
default:
- ret = -ENOENT;
- goto out;
- }
-
- /*
- * When a new CPU comes online and CDP is enabled then the new
- * RDT domains (if any) associated with both CDP RDT resources
- * are added in the same CPU online routine while the
- * rdtgroup_mutex is held. It should thus not happen for one
- * RDT domain to exist and be associated with its RDT CDP
- * resource but there is no RDT domain associated with the
- * peer RDT CDP resource. Hence the WARN.
- */
- _d_cdp = rdt_find_domain(_r_cdp, d->id, NULL);
- if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) {
- _r_cdp = NULL;
- _d_cdp = NULL;
- ret = -EINVAL;
+ case CDP_NONE:
+ return CDP_NONE;
}
-
-out:
- *r_cdp = _r_cdp;
- *d_cdp = _d_cdp;
-
- return ret;
}
/**
@@ -1171,11 +1126,11 @@ out:
* Return: false if CBM does not overlap, true if it does.
*/
static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
- unsigned long cbm, int closid, bool exclusive)
+ unsigned long cbm, int closid,
+ enum resctrl_conf_type type, bool exclusive)
{
enum rdtgrp_mode mode;
unsigned long ctrl_b;
- u32 *ctrl;
int i;
/* Check for any overlap with regions used by hardware directly */
@@ -1186,9 +1141,8 @@ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d
}
/* Check for overlap with other resource groups */
- ctrl = d->ctrl_val;
- for (i = 0; i < closids_supported(); i++, ctrl++) {
- ctrl_b = *ctrl;
+ for (i = 0; i < closids_supported(); i++) {
+ ctrl_b = resctrl_arch_get_config(r, d, i, type);
mode = rdtgroup_mode_by_closid(i);
if (closid_allocated(i) && i != closid &&
mode != RDT_MODE_PSEUDO_LOCKSETUP) {
@@ -1208,7 +1162,7 @@ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d
/**
* rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
- * @r: Resource to which domain instance @d belongs.
+ * @s: Schema for the resource to which domain instance @d belongs.
* @d: The domain instance for which @closid is being tested.
* @cbm: Capacity bitmask being tested.
* @closid: Intended closid for @cbm.
@@ -1226,19 +1180,19 @@ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d
*
* Return: true if CBM overlap detected, false if there is no overlap
*/
-bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
+bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
unsigned long cbm, int closid, bool exclusive)
{
- struct rdt_resource *r_cdp;
- struct rdt_domain *d_cdp;
+ enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
+ struct rdt_resource *r = s->res;
- if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, exclusive))
+ if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type,
+ exclusive))
return true;
- if (rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp) < 0)
+ if (!resctrl_arch_get_cdp_enabled(r->rid))
return false;
-
- return __rdtgroup_cbm_overlaps(r_cdp, d_cdp, cbm, closid, exclusive);
+ return __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive);
}
/**
@@ -1256,17 +1210,21 @@ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
{
int closid = rdtgrp->closid;
+ struct resctrl_schema *s;
struct rdt_resource *r;
bool has_cache = false;
struct rdt_domain *d;
+ u32 ctrl;
- for_each_alloc_enabled_rdt_resource(r) {
+ list_for_each_entry(s, &resctrl_schema_all, list) {
+ r = s->res;
if (r->rid == RDT_RESOURCE_MBA)
continue;
has_cache = true;
list_for_each_entry(d, &r->domains, list) {
- if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid],
- rdtgrp->closid, false)) {
+ ctrl = resctrl_arch_get_config(r, d, closid,
+ s->conf_type);
+ if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) {
rdt_last_cmd_puts("Schemata overlaps\n");
return false;
}
@@ -1397,6 +1355,7 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
static int rdtgroup_size_show(struct kernfs_open_file *of,
struct seq_file *s, void *v)
{
+ struct resctrl_schema *schema;
struct rdtgroup *rdtgrp;
struct rdt_resource *r;
struct rdt_domain *d;
@@ -1418,8 +1377,8 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
ret = -ENODEV;
} else {
seq_printf(s, "%*s:", max_name_width,
- rdtgrp->plr->r->name);
- size = rdtgroup_cbm_to_size(rdtgrp->plr->r,
+ rdtgrp->plr->s->name);
+ size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res,
rdtgrp->plr->d,
rdtgrp->plr->cbm);
seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
@@ -1427,18 +1386,19 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
goto out;
}
- for_each_alloc_enabled_rdt_resource(r) {
+ list_for_each_entry(schema, &resctrl_schema_all, list) {
+ r = schema->res;
sep = false;
- seq_printf(s, "%*s:", max_name_width, r->name);
+ seq_printf(s, "%*s:", max_name_width, schema->name);
list_for_each_entry(d, &r->domains, list) {
if (sep)
seq_putc(s, ';');
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
size = 0;
} else {
- ctrl = (!is_mba_sc(r) ?
- d->ctrl_val[rdtgrp->closid] :
- d->mbps_val[rdtgrp->closid]);
+ ctrl = resctrl_arch_get_config(r, d,
+ rdtgrp->closid,
+ schema->conf_type);
if (r->rid == RDT_RESOURCE_MBA)
size = ctrl;
else
@@ -1757,14 +1717,14 @@ int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
return ret;
}
-static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name,
+static int rdtgroup_mkdir_info_resdir(void *priv, char *name,
unsigned long fflags)
{
struct kernfs_node *kn_subdir;
int ret;
kn_subdir = kernfs_create_dir(kn_info, name,
- kn_info->mode, r);
+ kn_info->mode, priv);
if (IS_ERR(kn_subdir))
return PTR_ERR(kn_subdir);
@@ -1781,6 +1741,7 @@ static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name,
static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
{
+ struct resctrl_schema *s;
struct rdt_resource *r;
unsigned long fflags;
char name[32];
@@ -1795,9 +1756,11 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
if (ret)
goto out_destroy;
- for_each_alloc_enabled_rdt_resource(r) {
+ /* loop over enabled controls, these are all alloc_enabled */
+ list_for_each_entry(s, &resctrl_schema_all, list) {
+ r = s->res;
fflags = r->fflags | RF_CTRL_INFO;
- ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags);
+ ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags);
if (ret)
goto out_destroy;
}
@@ -1867,7 +1830,7 @@ static void l2_qos_cfg_update(void *arg)
static inline bool is_mba_linear(void)
{
- return rdt_resources_all[RDT_RESOURCE_MBA].membw.delay_linear;
+ return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.delay_linear;
}
static int set_cache_qos_cfg(int level, bool enable)
@@ -1888,7 +1851,7 @@ static int set_cache_qos_cfg(int level, bool enable)
if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
return -ENOMEM;
- r_l = &rdt_resources_all[level];
+ r_l = &rdt_resources_all[level].r_resctrl;
list_for_each_entry(d, &r_l->domains, list) {
if (r_l->cache.arch_has_per_cpu_cfg)
/* Pick all the CPUs in the domain instance */
@@ -1914,14 +1877,16 @@ static int set_cache_qos_cfg(int level, bool enable)
/* Restore the qos cfg state when a domain comes online */
void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
{
- if (!r->alloc_capable)
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+
+ if (!r->cdp_capable)
return;
- if (r == &rdt_resources_all[RDT_RESOURCE_L2DATA])
- l2_qos_cfg_update(&r->alloc_enabled);
+ if (r->rid == RDT_RESOURCE_L2)
+ l2_qos_cfg_update(&hw_res->cdp_enabled);
- if (r == &rdt_resources_all[RDT_RESOURCE_L3DATA])
- l3_qos_cfg_update(&r->alloc_enabled);
+ if (r->rid == RDT_RESOURCE_L3)
+ l3_qos_cfg_update(&hw_res->cdp_enabled);
}
/*
@@ -1932,7 +1897,8 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
*/
static int set_mba_sc(bool mba_sc)
{
- struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA];
+ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
+ struct rdt_hw_domain *hw_dom;
struct rdt_domain *d;
if (!is_mbm_enabled() || !is_mba_linear() ||
@@ -1940,73 +1906,60 @@ static int set_mba_sc(bool mba_sc)
return -EINVAL;
r->membw.mba_sc = mba_sc;
- list_for_each_entry(d, &r->domains, list)
- setup_default_ctrlval(r, d->ctrl_val, d->mbps_val);
+ list_for_each_entry(d, &r->domains, list) {
+ hw_dom = resctrl_to_arch_dom(d);
+ setup_default_ctrlval(r, hw_dom->ctrl_val, hw_dom->mbps_val);
+ }
return 0;
}
-static int cdp_enable(int level, int data_type, int code_type)
+static int cdp_enable(int level)
{
- struct rdt_resource *r_ldata = &rdt_resources_all[data_type];
- struct rdt_resource *r_lcode = &rdt_resources_all[code_type];
- struct rdt_resource *r_l = &rdt_resources_all[level];
+ struct rdt_resource *r_l = &rdt_resources_all[level].r_resctrl;
int ret;
- if (!r_l->alloc_capable || !r_ldata->alloc_capable ||
- !r_lcode->alloc_capable)
+ if (!r_l->alloc_capable)
return -EINVAL;
ret = set_cache_qos_cfg(level, true);
- if (!ret) {
- r_l->alloc_enabled = false;
- r_ldata->alloc_enabled = true;
- r_lcode->alloc_enabled = true;
- }
+ if (!ret)
+ rdt_resources_all[level].cdp_enabled = true;
+
return ret;
}
-static int cdpl3_enable(void)
+static void cdp_disable(int level)
{
- return cdp_enable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA,
- RDT_RESOURCE_L3CODE);
-}
+ struct rdt_hw_resource *r_hw = &rdt_resources_all[level];
-static int cdpl2_enable(void)
-{
- return cdp_enable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA,
- RDT_RESOURCE_L2CODE);
+ if (r_hw->cdp_enabled) {
+ set_cache_qos_cfg(level, false);
+ r_hw->cdp_enabled = false;
+ }
}
-static void cdp_disable(int level, int data_type, int code_type)
+int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable)
{
- struct rdt_resource *r = &rdt_resources_all[level];
+ struct rdt_hw_resource *hw_res = &rdt_resources_all[l];
- r->alloc_enabled = r->alloc_capable;
+ if (!hw_res->r_resctrl.cdp_capable)
+ return -EINVAL;
- if (rdt_resources_all[data_type].alloc_enabled) {
- rdt_resources_all[data_type].alloc_enabled = false;
- rdt_resources_all[code_type].alloc_enabled = false;
- set_cache_qos_cfg(level, false);
- }
-}
+ if (enable)
+ return cdp_enable(l);
-static void cdpl3_disable(void)
-{
- cdp_disable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE);
-}
+ cdp_disable(l);
-static void cdpl2_disable(void)
-{
- cdp_disable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, RDT_RESOURCE_L2CODE);
+ return 0;
}
static void cdp_disable_all(void)
{
- if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
- cdpl3_disable();
- if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
- cdpl2_disable();
+ if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
+ resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
+ if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
+ resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
}
/*
@@ -2084,10 +2037,10 @@ static int rdt_enable_ctx(struct rdt_fs_context *ctx)
int ret = 0;
if (ctx->enable_cdpl2)
- ret = cdpl2_enable();
+ ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true);
if (!ret && ctx->enable_cdpl3)
- ret = cdpl3_enable();
+ ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true);
if (!ret && ctx->enable_mba_mbps)
ret = set_mba_sc(true);
@@ -2095,6 +2048,92 @@ static int rdt_enable_ctx(struct rdt_fs_context *ctx)
return ret;
}
+static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type)
+{
+ struct resctrl_schema *s;
+ const char *suffix = "";
+ int ret, cl;
+
+ s = kzalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+
+ s->res = r;
+ s->num_closid = resctrl_arch_get_num_closid(r);
+ if (resctrl_arch_get_cdp_enabled(r->rid))
+ s->num_closid /= 2;
+
+ s->conf_type = type;
+ switch (type) {
+ case CDP_CODE:
+ suffix = "CODE";
+ break;
+ case CDP_DATA:
+ suffix = "DATA";
+ break;
+ case CDP_NONE:
+ suffix = "";
+ break;
+ }
+
+ ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix);
+ if (ret >= sizeof(s->name)) {
+ kfree(s);
+ return -EINVAL;
+ }
+
+ cl = strlen(s->name);
+
+ /*
+ * If CDP is supported by this resource, but not enabled,
+ * include the suffix. This ensures the tabular format of the
+ * schemata file does not change between mounts of the filesystem.
+ */
+ if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid))
+ cl += 4;
+
+ if (cl > max_name_width)
+ max_name_width = cl;
+
+ INIT_LIST_HEAD(&s->list);
+ list_add(&s->list, &resctrl_schema_all);
+
+ return 0;
+}
+
+static int schemata_list_create(void)
+{
+ struct rdt_resource *r;
+ int ret = 0;
+
+ for_each_alloc_enabled_rdt_resource(r) {
+ if (resctrl_arch_get_cdp_enabled(r->rid)) {
+ ret = schemata_list_add(r, CDP_CODE);
+ if (ret)
+ break;
+
+ ret = schemata_list_add(r, CDP_DATA);
+ } else {
+ ret = schemata_list_add(r, CDP_NONE);
+ }
+
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static void schemata_list_destroy(void)
+{
+ struct resctrl_schema *s, *tmp;
+
+ list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) {
+ list_del(&s->list);
+ kfree(s);
+ }
+}
+
static int rdt_get_tree(struct fs_context *fc)
{
struct rdt_fs_context *ctx = rdt_fc2context(fc);
@@ -2116,11 +2155,17 @@ static int rdt_get_tree(struct fs_context *fc)
if (ret < 0)
goto out_cdp;
+ ret = schemata_list_create();
+ if (ret) {
+ schemata_list_destroy();
+ goto out_mba;
+ }
+
closid_init();
ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
if (ret < 0)
- goto out_mba;
+ goto out_schemata_free;
if (rdt_mon_capable) {
ret = mongroup_create_dir(rdtgroup_default.kn,
@@ -2153,7 +2198,7 @@ static int rdt_get_tree(struct fs_context *fc)
static_branch_enable_cpuslocked(&rdt_enable_key);
if (is_mbm_enabled()) {
- r = &rdt_resources_all[RDT_RESOURCE_L3];
+ r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
list_for_each_entry(dom, &r->domains, list)
mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL);
}
@@ -2170,6 +2215,8 @@ out_mongrp:
kernfs_remove(kn_mongrp);
out_info:
kernfs_remove(kn_info);
+out_schemata_free:
+ schemata_list_destroy();
out_mba:
if (ctx->enable_mba_mbps)
set_mba_sc(false);
@@ -2257,6 +2304,8 @@ static int rdt_init_fs_context(struct fs_context *fc)
static int reset_all_ctrls(struct rdt_resource *r)
{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+ struct rdt_hw_domain *hw_dom;
struct msr_param msr_param;
cpumask_var_t cpu_mask;
struct rdt_domain *d;
@@ -2267,7 +2316,7 @@ static int reset_all_ctrls(struct rdt_resource *r)
msr_param.res = r;
msr_param.low = 0;
- msr_param.high = r->num_closid;
+ msr_param.high = hw_res->num_closid;
/*
* Disable resource control for this resource by setting all
@@ -2275,10 +2324,11 @@ static int reset_all_ctrls(struct rdt_resource *r)
* from each domain to update the MSRs below.
*/
list_for_each_entry(d, &r->domains, list) {
+ hw_dom = resctrl_to_arch_dom(d);
cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
- for (i = 0; i < r->num_closid; i++)
- d->ctrl_val[i] = r->default_ctrl;
+ for (i = 0; i < hw_res->num_closid; i++)
+ hw_dom->ctrl_val[i] = r->default_ctrl;
}
cpu = get_cpu();
/* Update CBM on this cpu if it's in cpu_mask. */
@@ -2408,6 +2458,7 @@ static void rdt_kill_sb(struct super_block *sb)
rmdir_all_sub();
rdt_pseudo_lock_release();
rdtgroup_default.mode = RDT_MODE_SHAREABLE;
+ schemata_list_destroy();
static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
static_branch_disable_cpuslocked(&rdt_mon_enable_key);
static_branch_disable_cpuslocked(&rdt_enable_key);
@@ -2642,23 +2693,24 @@ static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
* Set the RDT domain up to start off with all usable allocations. That is,
* all shareable and unused bits. All-zero CBM is invalid.
*/
-static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r,
+static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s,
u32 closid)
{
- struct rdt_resource *r_cdp = NULL;
- struct rdt_domain *d_cdp = NULL;
+ enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
+ enum resctrl_conf_type t = s->conf_type;
+ struct resctrl_staged_config *cfg;
+ struct rdt_resource *r = s->res;
u32 used_b = 0, unused_b = 0;
unsigned long tmp_cbm;
enum rdtgrp_mode mode;
- u32 peer_ctl, *ctrl;
+ u32 peer_ctl, ctrl_val;
int i;
- rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp);
- d->have_new_ctrl = false;
- d->new_ctrl = r->cache.shareable_bits;
+ cfg = &d->staged_config[t];
+ cfg->have_new_ctrl = false;
+ cfg->new_ctrl = r->cache.shareable_bits;
used_b = r->cache.shareable_bits;
- ctrl = d->ctrl_val;
- for (i = 0; i < closids_supported(); i++, ctrl++) {
+ for (i = 0; i < closids_supported(); i++) {
if (closid_allocated(i) && i != closid) {
mode = rdtgroup_mode_by_closid(i);
if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
@@ -2673,35 +2725,38 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r,
* usage to ensure there is no overlap
* with an exclusive group.
*/
- if (d_cdp)
- peer_ctl = d_cdp->ctrl_val[i];
+ if (resctrl_arch_get_cdp_enabled(r->rid))
+ peer_ctl = resctrl_arch_get_config(r, d, i,
+ peer_type);
else
peer_ctl = 0;
- used_b |= *ctrl | peer_ctl;
+ ctrl_val = resctrl_arch_get_config(r, d, i,
+ s->conf_type);
+ used_b |= ctrl_val | peer_ctl;
if (mode == RDT_MODE_SHAREABLE)
- d->new_ctrl |= *ctrl | peer_ctl;
+ cfg->new_ctrl |= ctrl_val | peer_ctl;
}
}
if (d->plr && d->plr->cbm > 0)
used_b |= d->plr->cbm;
unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
- d->new_ctrl |= unused_b;
+ cfg->new_ctrl |= unused_b;
/*
* Force the initial CBM to be valid, user can
* modify the CBM based on system availability.
*/
- d->new_ctrl = cbm_ensure_valid(d->new_ctrl, r);
+ cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r);
/*
* Assign the u32 CBM to an unsigned long to ensure that
* bitmap_weight() does not access out-of-bound memory.
*/
- tmp_cbm = d->new_ctrl;
+ tmp_cbm = cfg->new_ctrl;
if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
- rdt_last_cmd_printf("No space on %s:%d\n", r->name, d->id);
+ rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->id);
return -ENOSPC;
}
- d->have_new_ctrl = true;
+ cfg->have_new_ctrl = true;
return 0;
}
@@ -2716,13 +2771,13 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r,
* If there are no more shareable bits available on any domain then
* the entire allocation will fail.
*/
-static int rdtgroup_init_cat(struct rdt_resource *r, u32 closid)
+static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
{
struct rdt_domain *d;
int ret;
- list_for_each_entry(d, &r->domains, list) {
- ret = __init_one_rdt_domain(d, r, closid);
+ list_for_each_entry(d, &s->res->domains, list) {
+ ret = __init_one_rdt_domain(d, s, closid);
if (ret < 0)
return ret;
}
@@ -2733,30 +2788,34 @@ static int rdtgroup_init_cat(struct rdt_resource *r, u32 closid)
/* Initialize MBA resource with default values. */
static void rdtgroup_init_mba(struct rdt_resource *r)
{
+ struct resctrl_staged_config *cfg;
struct rdt_domain *d;
list_for_each_entry(d, &r->domains, list) {
- d->new_ctrl = is_mba_sc(r) ? MBA_MAX_MBPS : r->default_ctrl;
- d->have_new_ctrl = true;
+ cfg = &d->staged_config[CDP_NONE];
+ cfg->new_ctrl = is_mba_sc(r) ? MBA_MAX_MBPS : r->default_ctrl;
+ cfg->have_new_ctrl = true;
}
}
/* Initialize the RDT group's allocations. */
static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
{
+ struct resctrl_schema *s;
struct rdt_resource *r;
int ret;
- for_each_alloc_enabled_rdt_resource(r) {
+ list_for_each_entry(s, &resctrl_schema_all, list) {
+ r = s->res;
if (r->rid == RDT_RESOURCE_MBA) {
rdtgroup_init_mba(r);
} else {
- ret = rdtgroup_init_cat(r, rdtgrp->closid);
+ ret = rdtgroup_init_cat(s, rdtgrp->closid);
if (ret < 0)
return ret;
}
- ret = update_domains(r, rdtgrp->closid);
+ ret = resctrl_arch_update_domains(r, rdtgrp->closid);
if (ret < 0) {
rdt_last_cmd_puts("Failed to initialize allocations\n");
return ret;
@@ -3124,13 +3183,13 @@ out:
static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
{
- if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
+ if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
seq_puts(seq, ",cdp");
- if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
+ if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
seq_puts(seq, ",cdpl2");
- if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA]))
+ if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl))
seq_puts(seq, ",mba_MBps");
return 0;
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 08651a4e6aa0..42fc41dd0e1f 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -508,7 +508,7 @@ static struct irq_chip hpet_msi_controller __ro_after_init = {
.irq_set_affinity = msi_domain_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_write_msi_msg = hpet_msi_write_msg,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_AFFINITY_PRE_STARTUP,
};
static int hpet_msi_init(struct irq_domain *domain,
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 739be5da3bca..fe03bd978761 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -208,30 +208,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
kvm_mmu_after_set_cpuid(vcpu);
}
-static int is_efer_nx(void)
-{
- return host_efer & EFER_NX;
-}
-
-static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
-{
- int i;
- struct kvm_cpuid_entry2 *e, *entry;
-
- entry = NULL;
- for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
- e = &vcpu->arch.cpuid_entries[i];
- if (e->function == 0x80000001) {
- entry = e;
- break;
- }
- }
- if (entry && cpuid_entry_has(entry, X86_FEATURE_NX) && !is_efer_nx()) {
- cpuid_entry_clear(entry, X86_FEATURE_NX);
- printk(KERN_INFO "kvm: guest NX capability removed\n");
- }
-}
-
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
@@ -302,7 +278,6 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
vcpu->arch.cpuid_entries = e2;
vcpu->arch.cpuid_nent = cpuid->nent;
- cpuid_fix_nx_cap(vcpu);
kvm_update_cpuid_runtime(vcpu);
kvm_vcpu_after_set_cpuid(vcpu);
@@ -401,7 +376,6 @@ static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask)
void kvm_set_cpu_caps(void)
{
- unsigned int f_nx = is_efer_nx() ? F(NX) : 0;
#ifdef CONFIG_X86_64
unsigned int f_gbpages = F(GBPAGES);
unsigned int f_lm = F(LM);
@@ -515,7 +489,7 @@ void kvm_set_cpu_caps(void)
F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
F(PAT) | F(PSE36) | 0 /* Reserved */ |
- f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
+ F(NX) | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) |
0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW)
);
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index b07592ca92f0..41d2a53c5dea 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1933,7 +1933,7 @@ ret_success:
void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *entry;
- struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+ struct kvm_vcpu_hv *hv_vcpu;
entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE, 0);
if (entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX) {
@@ -2016,6 +2016,7 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result)
{
+ trace_kvm_hv_hypercall_done(result);
kvm_hv_hypercall_set_result(vcpu, result);
++vcpu->stat.hypercalls;
return kvm_skip_emulated_instruction(vcpu);
@@ -2139,6 +2140,7 @@ static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code)
int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
{
+ struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
struct kvm_hv_hcall hc;
u64 ret = HV_STATUS_SUCCESS;
@@ -2173,17 +2175,25 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
hc.rep = !!(hc.rep_cnt || hc.rep_idx);
- if (hc.fast && is_xmm_fast_hypercall(&hc))
- kvm_hv_hypercall_read_xmm(&hc);
-
trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx,
hc.ingpa, hc.outgpa);
- if (unlikely(!hv_check_hypercall_access(to_hv_vcpu(vcpu), hc.code))) {
+ if (unlikely(!hv_check_hypercall_access(hv_vcpu, hc.code))) {
ret = HV_STATUS_ACCESS_DENIED;
goto hypercall_complete;
}
+ if (hc.fast && is_xmm_fast_hypercall(&hc)) {
+ if (unlikely(hv_vcpu->enforce_cpuid &&
+ !(hv_vcpu->cpuid_cache.features_edx &
+ HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE))) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+ }
+
+ kvm_hv_hypercall_read_xmm(&hc);
+ }
+
switch (hc.code) {
case HVCALL_NOTIFY_LONG_SPIN_WAIT:
if (unlikely(hc.rep)) {
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 698969e18fe3..ff005fe738a4 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -96,7 +96,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
{
ioapic->rtc_status.pending_eoi = 0;
- bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID);
+ bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID + 1);
}
static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index 660401700075..11e4065e1617 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -43,13 +43,13 @@ struct kvm_vcpu;
struct dest_map {
/* vcpu bitmap where IRQ has been sent */
- DECLARE_BITMAP(map, KVM_MAX_VCPU_ID);
+ DECLARE_BITMAP(map, KVM_MAX_VCPU_ID + 1);
/*
* Vector sent to a given vcpu, only valid when
* the vcpu's bit in map is set
*/
- u8 vectors[KVM_MAX_VCPU_ID];
+ u8 vectors[KVM_MAX_VCPU_ID + 1];
};
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 66f7f5bc3482..47b765270239 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1644,7 +1644,7 @@ static int is_empty_shadow_page(u64 *spt)
* aggregate version in order to make the slab shrinker
* faster
*/
-static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr)
+static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr)
{
kvm->arch.n_used_mmu_pages += nr;
percpu_counter_add(&kvm_total_used_mmu_pages, nr);
@@ -2535,6 +2535,7 @@ static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync)
{
struct kvm_mmu_page *sp;
+ bool locked = false;
/*
* Force write-protection if the page is being tracked. Note, the page
@@ -2557,9 +2558,34 @@ int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync)
if (sp->unsync)
continue;
+ /*
+ * TDP MMU page faults require an additional spinlock as they
+ * run with mmu_lock held for read, not write, and the unsync
+ * logic is not thread safe. Take the spinklock regardless of
+ * the MMU type to avoid extra conditionals/parameters, there's
+ * no meaningful penalty if mmu_lock is held for write.
+ */
+ if (!locked) {
+ locked = true;
+ spin_lock(&vcpu->kvm->arch.mmu_unsync_pages_lock);
+
+ /*
+ * Recheck after taking the spinlock, a different vCPU
+ * may have since marked the page unsync. A false
+ * positive on the unprotected check above is not
+ * possible as clearing sp->unsync _must_ hold mmu_lock
+ * for write, i.e. unsync cannot transition from 0->1
+ * while this CPU holds mmu_lock for read (or write).
+ */
+ if (READ_ONCE(sp->unsync))
+ continue;
+ }
+
WARN_ON(sp->role.level != PG_LEVEL_4K);
kvm_unsync_page(vcpu, sp);
}
+ if (locked)
+ spin_unlock(&vcpu->kvm->arch.mmu_unsync_pages_lock);
/*
* We need to ensure that the marking of unsync pages is visible
@@ -5537,6 +5563,8 @@ void kvm_mmu_init_vm(struct kvm *kvm)
{
struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
+ spin_lock_init(&kvm->arch.mmu_unsync_pages_lock);
+
if (!kvm_mmu_init_tdp_mmu(kvm))
/*
* No smp_load/store wrappers needed here as we are in
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 0853370bd811..d80cb122b5f3 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -43,6 +43,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
if (!kvm->arch.tdp_mmu_enabled)
return;
+ WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages));
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
/*
@@ -81,8 +82,6 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head)
void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
bool shared)
{
- gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
-
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
if (!refcount_dec_and_test(&root->tdp_mmu_root_count))
@@ -94,7 +93,7 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
list_del_rcu(&root->link);
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
- zap_gfn_range(kvm, root, 0, max_gfn, false, false, shared);
+ zap_gfn_range(kvm, root, 0, -1ull, false, false, shared);
call_rcu(&root->rcu_head, tdp_mmu_free_sp_rcu_callback);
}
@@ -724,13 +723,29 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t start, gfn_t end, bool can_yield, bool flush,
bool shared)
{
+ gfn_t max_gfn_host = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
+ bool zap_all = (start == 0 && end >= max_gfn_host);
struct tdp_iter iter;
+ /*
+ * No need to try to step down in the iterator when zapping all SPTEs,
+ * zapping the top-level non-leaf SPTEs will recurse on their children.
+ */
+ int min_level = zap_all ? root->role.level : PG_LEVEL_4K;
+
+ /*
+ * Bound the walk at host.MAXPHYADDR, guest accesses beyond that will
+ * hit a #PF(RSVD) and never get to an EPT Violation/Misconfig / #NPF,
+ * and so KVM will never install a SPTE for such addresses.
+ */
+ end = min(end, max_gfn_host);
+
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
rcu_read_lock();
- tdp_root_for_each_pte(iter, root, start, end) {
+ for_each_tdp_pte_min_level(iter, root->spt, root->role.level,
+ min_level, start, end) {
retry:
if (can_yield &&
tdp_mmu_iter_cond_resched(kvm, &iter, flush, shared)) {
@@ -744,9 +759,10 @@ retry:
/*
* If this is a non-last-level SPTE that covers a larger range
* than should be zapped, continue, and zap the mappings at a
- * lower level.
+ * lower level, except when zapping all SPTEs.
*/
- if ((iter.gfn < start ||
+ if (!zap_all &&
+ (iter.gfn < start ||
iter.gfn + KVM_PAGES_PER_HPAGE(iter.level) > end) &&
!is_last_spte(iter.old_spte, iter.level))
continue;
@@ -794,12 +810,11 @@ bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start,
void kvm_tdp_mmu_zap_all(struct kvm *kvm)
{
- gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
bool flush = false;
int i;
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
- flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, max_gfn,
+ flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, -1ull,
flush, false);
if (flush)
@@ -838,7 +853,6 @@ static struct kvm_mmu_page *next_invalidated_root(struct kvm *kvm,
*/
void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
{
- gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
struct kvm_mmu_page *next_root;
struct kvm_mmu_page *root;
bool flush = false;
@@ -854,8 +868,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
rcu_read_unlock();
- flush = zap_gfn_range(kvm, root, 0, max_gfn, true, flush,
- true);
+ flush = zap_gfn_range(kvm, root, 0, -1ull, true, flush, true);
/*
* Put the reference acquired in
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 1d01da64c333..a8ad78a2faa1 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -646,7 +646,7 @@ out:
void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct vmcb *vmcb = svm->vmcb;
+ struct vmcb *vmcb = svm->vmcb01.ptr;
bool activated = kvm_vcpu_apicv_active(vcpu);
if (!enable_apicv)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 3bd09c50c98b..e5515477c30a 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -158,6 +158,9 @@ void recalc_intercepts(struct vcpu_svm *svm)
/* If SMI is not intercepted, ignore guest SMI intercept as well */
if (!intercept_smi)
vmcb_clr_intercept(c, INTERCEPT_SMI);
+
+ vmcb_set_intercept(c, INTERCEPT_VMLOAD);
+ vmcb_set_intercept(c, INTERCEPT_VMSAVE);
}
static void copy_vmcb_control_area(struct vmcb_control_area *dst,
@@ -503,7 +506,11 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
{
- const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK;
+ const u32 int_ctl_vmcb01_bits =
+ V_INTR_MASKING_MASK | V_GIF_MASK | V_GIF_ENABLE_MASK;
+
+ const u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
+
struct kvm_vcpu *vcpu = &svm->vcpu;
/*
@@ -515,7 +522,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
* Also covers avic_vapic_bar, avic_backing_page, avic_logical_id,
* avic_physical_id.
*/
- WARN_ON(svm->vmcb01.ptr->control.int_ctl & AVIC_ENABLE_MASK);
+ WARN_ON(kvm_apicv_activated(svm->vcpu.kvm));
/* Copied from vmcb01. msrpm_base can be overwritten later. */
svm->vmcb->control.nested_ctl = svm->vmcb01.ptr->control.nested_ctl;
@@ -535,8 +542,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
vcpu->arch.l1_tsc_offset + svm->nested.ctl.tsc_offset;
svm->vmcb->control.int_ctl =
- (svm->nested.ctl.int_ctl & ~mask) |
- (svm->vmcb01.ptr->control.int_ctl & mask);
+ (svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
+ (svm->vmcb01.ptr->control.int_ctl & int_ctl_vmcb01_bits);
svm->vmcb->control.virt_ext = svm->nested.ctl.virt_ext;
svm->vmcb->control.int_vector = svm->nested.ctl.int_vector;
@@ -702,8 +709,8 @@ out:
}
/* Copy state save area fields which are handled by VMRUN */
-void svm_copy_vmrun_state(struct vmcb_save_area *from_save,
- struct vmcb_save_area *to_save)
+void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
+ struct vmcb_save_area *from_save)
{
to_save->es = from_save->es;
to_save->cs = from_save->cs;
@@ -722,7 +729,7 @@ void svm_copy_vmrun_state(struct vmcb_save_area *from_save,
to_save->cpl = 0;
}
-void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
+void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
{
to_vmcb->save.fs = from_vmcb->save.fs;
to_vmcb->save.gs = from_vmcb->save.gs;
@@ -1385,7 +1392,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
- svm_copy_vmrun_state(save, &svm->vmcb01.ptr->save);
+ svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save);
nested_load_control_from_vmcb12(svm, ctl);
svm_switch_vmcb(svm, &svm->nested.vmcb02);
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 6710d9ee2e4b..7fbce342eec4 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -64,6 +64,7 @@ static DEFINE_MUTEX(sev_bitmap_lock);
unsigned int max_sev_asid;
static unsigned int min_sev_asid;
static unsigned long sev_me_mask;
+static unsigned int nr_asids;
static unsigned long *sev_asid_bitmap;
static unsigned long *sev_reclaim_asid_bitmap;
@@ -78,11 +79,11 @@ struct enc_region {
/* Called with the sev_bitmap_lock held, or on shutdown */
static int sev_flush_asids(int min_asid, int max_asid)
{
- int ret, pos, error = 0;
+ int ret, asid, error = 0;
/* Check if there are any ASIDs to reclaim before performing a flush */
- pos = find_next_bit(sev_reclaim_asid_bitmap, max_asid, min_asid);
- if (pos >= max_asid)
+ asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid);
+ if (asid > max_asid)
return -EBUSY;
/*
@@ -115,15 +116,15 @@ static bool __sev_recycle_asids(int min_asid, int max_asid)
/* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */
bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
- max_sev_asid);
- bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid);
+ nr_asids);
+ bitmap_zero(sev_reclaim_asid_bitmap, nr_asids);
return true;
}
static int sev_asid_new(struct kvm_sev_info *sev)
{
- int pos, min_asid, max_asid, ret;
+ int asid, min_asid, max_asid, ret;
bool retry = true;
enum misc_res_type type;
@@ -143,11 +144,11 @@ static int sev_asid_new(struct kvm_sev_info *sev)
* SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
* SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
*/
- min_asid = sev->es_active ? 0 : min_sev_asid - 1;
+ min_asid = sev->es_active ? 1 : min_sev_asid;
max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
again:
- pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid);
- if (pos >= max_asid) {
+ asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid);
+ if (asid > max_asid) {
if (retry && __sev_recycle_asids(min_asid, max_asid)) {
retry = false;
goto again;
@@ -157,11 +158,11 @@ again:
goto e_uncharge;
}
- __set_bit(pos, sev_asid_bitmap);
+ __set_bit(asid, sev_asid_bitmap);
mutex_unlock(&sev_bitmap_lock);
- return pos + 1;
+ return asid;
e_uncharge:
misc_cg_uncharge(type, sev->misc_cg, 1);
put_misc_cg(sev->misc_cg);
@@ -179,17 +180,16 @@ static int sev_get_asid(struct kvm *kvm)
static void sev_asid_free(struct kvm_sev_info *sev)
{
struct svm_cpu_data *sd;
- int cpu, pos;
+ int cpu;
enum misc_res_type type;
mutex_lock(&sev_bitmap_lock);
- pos = sev->asid - 1;
- __set_bit(pos, sev_reclaim_asid_bitmap);
+ __set_bit(sev->asid, sev_reclaim_asid_bitmap);
for_each_possible_cpu(cpu) {
sd = per_cpu(svm_data, cpu);
- sd->sev_vmcbs[pos] = NULL;
+ sd->sev_vmcbs[sev->asid] = NULL;
}
mutex_unlock(&sev_bitmap_lock);
@@ -1857,12 +1857,17 @@ void __init sev_hardware_setup(void)
min_sev_asid = edx;
sev_me_mask = 1UL << (ebx & 0x3f);
- /* Initialize SEV ASID bitmaps */
- sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
+ /*
+ * Initialize SEV ASID bitmaps. Allocate space for ASID 0 in the bitmap,
+ * even though it's never used, so that the bitmap is indexed by the
+ * actual ASID.
+ */
+ nr_asids = max_sev_asid + 1;
+ sev_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL);
if (!sev_asid_bitmap)
goto out;
- sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
+ sev_reclaim_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL);
if (!sev_reclaim_asid_bitmap) {
bitmap_free(sev_asid_bitmap);
sev_asid_bitmap = NULL;
@@ -1907,7 +1912,7 @@ void sev_hardware_teardown(void)
return;
/* No need to take sev_bitmap_lock, all VMs have been destroyed. */
- sev_flush_asids(0, max_sev_asid);
+ sev_flush_asids(1, max_sev_asid);
bitmap_free(sev_asid_bitmap);
bitmap_free(sev_reclaim_asid_bitmap);
@@ -1921,7 +1926,7 @@ int sev_cpu_init(struct svm_cpu_data *sd)
if (!sev_enabled)
return 0;
- sd->sev_vmcbs = kcalloc(max_sev_asid + 1, sizeof(void *), GFP_KERNEL);
+ sd->sev_vmcbs = kcalloc(nr_asids, sizeof(void *), GFP_KERNEL);
if (!sd->sev_vmcbs)
return -ENOMEM;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 664d20f0689c..69639f9624f5 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1406,8 +1406,6 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
goto error_free_vmsa_page;
}
- svm_vcpu_init_msrpm(vcpu, svm->msrpm);
-
svm->vmcb01.ptr = page_address(vmcb01_page);
svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT);
@@ -1419,6 +1417,8 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
svm_switch_vmcb(svm, &svm->vmcb01);
init_vmcb(vcpu);
+ svm_vcpu_init_msrpm(vcpu, svm->msrpm);
+
svm_init_osvw(vcpu);
vcpu->arch.microcode_version = 0x01000065;
@@ -1568,8 +1568,11 @@ static void svm_set_vintr(struct vcpu_svm *svm)
{
struct vmcb_control_area *control;
- /* The following fields are ignored when AVIC is enabled */
- WARN_ON(kvm_vcpu_apicv_active(&svm->vcpu));
+ /*
+ * The following fields are ignored when AVIC is enabled
+ */
+ WARN_ON(kvm_apicv_activated(svm->vcpu.kvm));
+
svm_set_intercept(svm, INTERCEPT_VINTR);
/*
@@ -1586,17 +1589,18 @@ static void svm_set_vintr(struct vcpu_svm *svm)
static void svm_clear_vintr(struct vcpu_svm *svm)
{
- const u32 mask = V_TPR_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK | V_INTR_MASKING_MASK;
svm_clr_intercept(svm, INTERCEPT_VINTR);
/* Drop int_ctl fields related to VINTR injection. */
- svm->vmcb->control.int_ctl &= mask;
+ svm->vmcb->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK;
if (is_guest_mode(&svm->vcpu)) {
- svm->vmcb01.ptr->control.int_ctl &= mask;
+ svm->vmcb01.ptr->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK;
WARN_ON((svm->vmcb->control.int_ctl & V_TPR_MASK) !=
(svm->nested.ctl.int_ctl & V_TPR_MASK));
- svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & ~mask;
+
+ svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl &
+ V_IRQ_INJECTION_BITS_MASK;
}
vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
@@ -2147,11 +2151,12 @@ static int vmload_vmsave_interception(struct kvm_vcpu *vcpu, bool vmload)
ret = kvm_skip_emulated_instruction(vcpu);
if (vmload) {
- nested_svm_vmloadsave(vmcb12, svm->vmcb);
+ svm_copy_vmloadsave_state(svm->vmcb, vmcb12);
svm->sysenter_eip_hi = 0;
svm->sysenter_esp_hi = 0;
- } else
- nested_svm_vmloadsave(svm->vmcb, vmcb12);
+ } else {
+ svm_copy_vmloadsave_state(vmcb12, svm->vmcb);
+ }
kvm_vcpu_unmap(vcpu, &map, true);
@@ -4344,8 +4349,8 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
- svm_copy_vmrun_state(&svm->vmcb01.ptr->save,
- map_save.hva + 0x400);
+ svm_copy_vmrun_state(map_save.hva + 0x400,
+ &svm->vmcb01.ptr->save);
kvm_vcpu_unmap(vcpu, &map_save, true);
}
@@ -4393,8 +4398,8 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
&map_save) == -EINVAL)
return 1;
- svm_copy_vmrun_state(map_save.hva + 0x400,
- &svm->vmcb01.ptr->save);
+ svm_copy_vmrun_state(&svm->vmcb01.ptr->save,
+ map_save.hva + 0x400);
kvm_vcpu_unmap(vcpu, &map_save, true);
}
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 7e2090752d8f..bd0fe94c2920 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -464,9 +464,9 @@ void svm_leave_nested(struct vcpu_svm *svm);
void svm_free_nested(struct vcpu_svm *svm);
int svm_allocate_nested(struct vcpu_svm *svm);
int nested_svm_vmrun(struct kvm_vcpu *vcpu);
-void svm_copy_vmrun_state(struct vmcb_save_area *from_save,
- struct vmcb_save_area *to_save);
-void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
+void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
+ struct vmcb_save_area *from_save);
+void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
int nested_svm_vmexit(struct vcpu_svm *svm);
static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
index 9b9a55abc29f..c53b8bf8d013 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.h
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -89,7 +89,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments(
* as we mark it dirty unconditionally towards end of vcpu
* init phase.
*/
- if (vmcb && vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) &&
+ if (vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) &&
hve->hv_enlightenments_control.msr_bitmap)
vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
}
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index b484141ea15b..03ebe368333e 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -92,6 +92,21 @@ TRACE_EVENT(kvm_hv_hypercall,
__entry->outgpa)
);
+TRACE_EVENT(kvm_hv_hypercall_done,
+ TP_PROTO(u64 result),
+ TP_ARGS(result),
+
+ TP_STRUCT__entry(
+ __field(__u64, result)
+ ),
+
+ TP_fast_assign(
+ __entry->result = result;
+ ),
+
+ TP_printk("result 0x%llx", __entry->result)
+);
+
/*
* Tracepoint for Xen hypercall.
*/
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 1a52134b0c42..b3f77d18eb5a 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -330,6 +330,31 @@ void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu)
vcpu_put(vcpu);
}
+#define EPTP_PA_MASK GENMASK_ULL(51, 12)
+
+static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp)
+{
+ return VALID_PAGE(root_hpa) &&
+ ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK));
+}
+
+static void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp,
+ gpa_t addr)
+{
+ uint i;
+ struct kvm_mmu_root_info *cached_root;
+
+ WARN_ON_ONCE(!mmu_is_nested(vcpu));
+
+ for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
+ cached_root = &vcpu->arch.mmu->prev_roots[i];
+
+ if (nested_ept_root_matches(cached_root->hpa, cached_root->pgd,
+ eptp))
+ vcpu->arch.mmu->invlpg(vcpu, addr, cached_root->hpa);
+ }
+}
+
static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
struct x86_exception *fault)
{
@@ -342,10 +367,22 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
vm_exit_reason = EXIT_REASON_PML_FULL;
vmx->nested.pml_full = false;
exit_qualification &= INTR_INFO_UNBLOCK_NMI;
- } else if (fault->error_code & PFERR_RSVD_MASK)
- vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
- else
- vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
+ } else {
+ if (fault->error_code & PFERR_RSVD_MASK)
+ vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
+ else
+ vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
+
+ /*
+ * Although the caller (kvm_inject_emulated_page_fault) would
+ * have already synced the faulting address in the shadow EPT
+ * tables for the current EPTP12, we also need to sync it for
+ * any other cached EPTP02s based on the same EP4TA, since the
+ * TLB associates mappings to the EP4TA rather than the full EPTP.
+ */
+ nested_ept_invalidate_addr(vcpu, vmcs12->ept_pointer,
+ fault->address);
+ }
nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification);
vmcs12->guest_physical_address = fault->address;
@@ -5325,14 +5362,6 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
return nested_vmx_succeed(vcpu);
}
-#define EPTP_PA_MASK GENMASK_ULL(51, 12)
-
-static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp)
-{
- return VALID_PAGE(root_hpa) &&
- ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK));
-}
-
/* Emulate the INVEPT instruction */
static int handle_invept(struct kvm_vcpu *vcpu)
{
@@ -5826,7 +5855,8 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
if (is_nmi(intr_info))
return true;
else if (is_page_fault(intr_info))
- return vcpu->arch.apf.host_apf_flags || !enable_ept;
+ return vcpu->arch.apf.host_apf_flags ||
+ vmx_need_pf_intercept(vcpu);
else if (is_debug(intr_info) &&
vcpu->guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index db88ed4f2121..17a1cb4b059d 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -522,7 +522,7 @@ static inline struct vmcs *alloc_vmcs(bool shadow)
static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx)
{
- return vmx->secondary_exec_control &
+ return secondary_exec_controls_get(vmx) &
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a4fd10604f72..e5d5c5ed7dd4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3407,7 +3407,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
break;
case MSR_KVM_ASYNC_PF_ACK:
- if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+ if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
return 1;
if (data & 0x1) {
vcpu->arch.apf.pageready_pending = false;
@@ -3746,7 +3746,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vcpu->arch.apf.msr_int_val;
break;
case MSR_KVM_ASYNC_PF_ACK:
- if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+ if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
return 1;
msr_info->data = 0;
@@ -4358,8 +4358,17 @@ static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
{
- return kvm_arch_interrupt_allowed(vcpu) &&
- kvm_cpu_accept_dm_intr(vcpu);
+ /*
+ * Do not cause an interrupt window exit if an exception
+ * is pending or an event needs reinjection; userspace
+ * might want to inject the interrupt manually using KVM_SET_REGS
+ * or KVM_SET_SREGS. For that to work, we must be at an
+ * instruction boundary and with no events half-injected.
+ */
+ return (kvm_arch_interrupt_allowed(vcpu) &&
+ kvm_cpu_accept_dm_intr(vcpu) &&
+ !kvm_event_needs_reinjection(vcpu) &&
+ !vcpu->arch.exception.pending);
}
static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index cd768dafca9e..933a2ebad471 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -376,12 +376,12 @@ static void enter_uniprocessor(void)
goto out;
}
- get_online_cpus();
+ cpus_read_lock();
cpumask_copy(downed_cpus, cpu_online_mask);
cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus);
if (num_online_cpus() > 1)
pr_notice("Disabling non-boot CPUs...\n");
- put_online_cpus();
+ cpus_read_unlock();
for_each_cpu(cpu, downed_cpus) {
err = remove_cpu(cpu);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 4b951458c9fc..16d76f814e9b 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1219,6 +1219,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
}
break;
+ /* speculation barrier */
+ case BPF_ST | BPF_NOSPEC:
+ if (boot_cpu_has(X86_FEATURE_XMM2))
+ /* Emit 'lfence' */
+ EMIT3(0x0F, 0xAE, 0xE8);
+ break;
+
/* ST: *(u8*)(dst_reg + off) = imm */
case BPF_ST | BPF_MEM | BPF_B:
if (is_ereg(dst_reg))
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 3da88ded6ee3..3bfda5f502cb 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -1886,6 +1886,12 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
i++;
break;
}
+ /* speculation barrier */
+ case BPF_ST | BPF_NOSPEC:
+ if (boot_cpu_has(X86_FEATURE_XMM2))
+ /* Emit 'lfence' */
+ EMIT3(0x0F, 0xAE, 0xE8);
+ break;
/* ST: *(u8*)(dst_reg + off) = imm */
case BPF_ST | BPF_MEM | BPF_H:
case BPF_ST | BPF_MEM | BPF_B:
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 3a070e7cdb8b..6665f8802098 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -58,19 +58,20 @@ static void msr_restore_context(struct saved_context *ctxt)
}
/**
- * __save_processor_state - save CPU registers before creating a
- * hibernation image and before restoring the memory state from it
- * @ctxt - structure to store the registers contents in
+ * __save_processor_state() - Save CPU registers before creating a
+ * hibernation image and before restoring
+ * the memory state from it
+ * @ctxt: Structure to store the registers contents in.
*
- * NOTE: If there is a CPU register the modification of which by the
- * boot kernel (ie. the kernel used for loading the hibernation image)
- * might affect the operations of the restored target kernel (ie. the one
- * saved in the hibernation image), then its contents must be saved by this
- * function. In other words, if kernel A is hibernated and different
- * kernel B is used for loading the hibernation image into memory, the
- * kernel A's __save_processor_state() function must save all registers
- * needed by kernel A, so that it can operate correctly after the resume
- * regardless of what kernel B does in the meantime.
+ * NOTE: If there is a CPU register the modification of which by the
+ * boot kernel (ie. the kernel used for loading the hibernation image)
+ * might affect the operations of the restored target kernel (ie. the one
+ * saved in the hibernation image), then its contents must be saved by this
+ * function. In other words, if kernel A is hibernated and different
+ * kernel B is used for loading the hibernation image into memory, the
+ * kernel A's __save_processor_state() function must save all registers
+ * needed by kernel A, so that it can operate correctly after the resume
+ * regardless of what kernel B does in the meantime.
*/
static void __save_processor_state(struct saved_context *ctxt)
{
@@ -181,9 +182,9 @@ static void fix_processor_context(void)
}
/**
- * __restore_processor_state - restore the contents of CPU registers saved
- * by __save_processor_state()
- * @ctxt - structure to load the registers contents from
+ * __restore_processor_state() - Restore the contents of CPU registers saved
+ * by __save_processor_state()
+ * @ctxt: Structure to load the registers contents from.
*
* The asm code that gets us here will have restored a usable GDT, although
* it will be pointing to the wrong alias.
diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk
index fd1ab80be0de..a4cf678cf5c8 100644
--- a/arch/x86/tools/chkobjdump.awk
+++ b/arch/x86/tools/chkobjdump.awk
@@ -10,6 +10,7 @@ BEGIN {
/^GNU objdump/ {
verstr = ""
+ gsub(/\(.*\)/, "");
for (i = 3; i <= NF; i++)
if (match($(i), "^[0-9]")) {
verstr = $(i);
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 04c5a44b9682..27c82207d387 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -26,6 +26,9 @@ static struct relocs relocs32;
#if ELF_BITS == 64
static struct relocs relocs32neg;
static struct relocs relocs64;
+#define FMT PRIu64
+#else
+#define FMT PRIu32
#endif
struct section {
@@ -57,12 +60,12 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = {
[S_REL] =
"^(__init_(begin|end)|"
"__x86_cpu_dev_(start|end)|"
- "(__parainstructions|__alt_instructions)(|_end)|"
- "(__iommu_table|__apicdrivers|__smp_locks)(|_end)|"
+ "(__parainstructions|__alt_instructions)(_end)?|"
+ "(__iommu_table|__apicdrivers|__smp_locks)(_end)?|"
"__(start|end)_pci_.*|"
"__(start|end)_builtin_fw|"
- "__(start|stop)___ksymtab(|_gpl)|"
- "__(start|stop)___kcrctab(|_gpl)|"
+ "__(start|stop)___ksymtab(_gpl)?|"
+ "__(start|stop)___kcrctab(_gpl)?|"
"__(start|stop)___param|"
"__(start|stop)___modver|"
"__(start|stop)___bug_table|"
@@ -389,7 +392,7 @@ static void read_ehdr(FILE *fp)
Elf_Shdr shdr;
if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0)
- die("Seek to %d failed: %s\n", ehdr.e_shoff, strerror(errno));
+ die("Seek to %" FMT " failed: %s\n", ehdr.e_shoff, strerror(errno));
if (fread(&shdr, sizeof(shdr), 1, fp) != 1)
die("Cannot read initial ELF section header: %s\n", strerror(errno));
@@ -412,17 +415,17 @@ static void read_shdrs(FILE *fp)
secs = calloc(shnum, sizeof(struct section));
if (!secs) {
- die("Unable to allocate %d section headers\n",
+ die("Unable to allocate %ld section headers\n",
shnum);
}
if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) {
- die("Seek to %d failed: %s\n",
- ehdr.e_shoff, strerror(errno));
+ die("Seek to %" FMT " failed: %s\n",
+ ehdr.e_shoff, strerror(errno));
}
for (i = 0; i < shnum; i++) {
struct section *sec = &secs[i];
if (fread(&shdr, sizeof(shdr), 1, fp) != 1)
- die("Cannot read ELF section headers %d/%d: %s\n",
+ die("Cannot read ELF section headers %d/%ld: %s\n",
i, shnum, strerror(errno));
sec->shdr.sh_name = elf_word_to_cpu(shdr.sh_name);
sec->shdr.sh_type = elf_word_to_cpu(shdr.sh_type);
@@ -450,12 +453,12 @@ static void read_strtabs(FILE *fp)
}
sec->strtab = malloc(sec->shdr.sh_size);
if (!sec->strtab) {
- die("malloc of %d bytes for strtab failed\n",
- sec->shdr.sh_size);
+ die("malloc of %" FMT " bytes for strtab failed\n",
+ sec->shdr.sh_size);
}
if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) {
- die("Seek to %d failed: %s\n",
- sec->shdr.sh_offset, strerror(errno));
+ die("Seek to %" FMT " failed: %s\n",
+ sec->shdr.sh_offset, strerror(errno));
}
if (fread(sec->strtab, 1, sec->shdr.sh_size, fp)
!= sec->shdr.sh_size) {
@@ -475,12 +478,12 @@ static void read_symtabs(FILE *fp)
}
sec->symtab = malloc(sec->shdr.sh_size);
if (!sec->symtab) {
- die("malloc of %d bytes for symtab failed\n",
- sec->shdr.sh_size);
+ die("malloc of %" FMT " bytes for symtab failed\n",
+ sec->shdr.sh_size);
}
if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) {
- die("Seek to %d failed: %s\n",
- sec->shdr.sh_offset, strerror(errno));
+ die("Seek to %" FMT " failed: %s\n",
+ sec->shdr.sh_offset, strerror(errno));
}
if (fread(sec->symtab, 1, sec->shdr.sh_size, fp)
!= sec->shdr.sh_size) {
@@ -508,12 +511,12 @@ static void read_relocs(FILE *fp)
}
sec->reltab = malloc(sec->shdr.sh_size);
if (!sec->reltab) {
- die("malloc of %d bytes for relocs failed\n",
- sec->shdr.sh_size);
+ die("malloc of %" FMT " bytes for relocs failed\n",
+ sec->shdr.sh_size);
}
if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) {
- die("Seek to %d failed: %s\n",
- sec->shdr.sh_offset, strerror(errno));
+ die("Seek to %" FMT " failed: %s\n",
+ sec->shdr.sh_offset, strerror(errno));
}
if (fread(sec->reltab, 1, sec->shdr.sh_size, fp)
!= sec->shdr.sh_size) {
diff --git a/arch/x86/tools/relocs.h b/arch/x86/tools/relocs.h
index 43c83c0fd22c..4c49c82446eb 100644
--- a/arch/x86/tools/relocs.h
+++ b/arch/x86/tools/relocs.h
@@ -17,6 +17,7 @@
#include <regex.h>
#include <tools/le_byteshift.h>
+__attribute__((__format__(printf, 1, 2)))
void die(char *fmt, ...) __attribute__((noreturn));
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))