aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--Documentation/dontdiff1
-rw-r--r--Documentation/driver-api/ntb.rst27
-rw-r--r--Documentation/filesystems/porting2
-rw-r--r--Documentation/kbuild/kbuild.rst5
-rw-r--r--Documentation/kbuild/makefiles.rst12
-rw-r--r--Documentation/virtual/kvm/api.txt15
-rw-r--r--MAINTAINERS8
-rw-r--r--Makefile61
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/arm/vdso/Makefile3
-rw-r--r--arch/arm64/kernel/vdso32/Makefile4
-rw-r--r--arch/hexagon/include/asm/pgalloc.h34
-rw-r--r--arch/riscv/Makefile2
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/kvm/interrupt.c23
-rw-r--r--arch/s390/mm/init.c7
-rw-r--r--arch/sparc/vdso/Makefile3
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/boot/compressed/eboot.c10
-rw-r--r--arch/x86/boot/compressed/misc.c1
-rw-r--r--arch/x86/boot/compressed/misc.h1
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c1
-rw-r--r--arch/x86/entry/calling.h6
-rw-r--r--arch/x86/entry/entry_32.S61
-rw-r--r--arch/x86/entry/entry_64.S155
-rw-r--r--arch/x86/entry/thunk_64.S5
-rw-r--r--arch/x86/entry/vdso/Makefile5
-rw-r--r--arch/x86/hyperv/hv_init.c13
-rw-r--r--arch/x86/include/asm/apic.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h34
-rw-r--r--arch/x86/include/asm/kvm_para.h2
-rw-r--r--arch/x86/include/asm/paravirt.h23
-rw-r--r--arch/x86/include/asm/paravirt_types.h2
-rw-r--r--arch/x86/include/asm/traps.h4
-rw-r--r--arch/x86/include/uapi/asm/kvm.h9
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/asm-offsets.c1
-rw-r--r--arch/x86/kernel/e820.c4
-rw-r--r--arch/x86/kernel/head_64.S8
-rw-r--r--arch/x86/kernel/kvm.c9
-rw-r--r--arch/x86/kernel/mpparse.c10
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/process_64.c12
-rw-r--r--arch/x86/kernel/ptrace.c14
-rw-r--r--arch/x86/kernel/traps.c6
-rw-r--r--arch/x86/kvm/cpuid.c12
-rw-r--r--arch/x86/kvm/emulate.c44
-rw-r--r--arch/x86/kvm/hyperv.c20
-rw-r--r--arch/x86/kvm/ioapic.c15
-rw-r--r--arch/x86/kvm/lapic.c202
-rw-r--r--arch/x86/kvm/lapic.h1
-rw-r--r--arch/x86/kvm/mmu.c6
-rw-r--r--arch/x86/kvm/pmu.c27
-rw-r--r--arch/x86/kvm/svm.c42
-rw-r--r--arch/x86/kvm/vmx/nested.c13
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c11
-rw-r--r--arch/x86/kvm/vmx/vmenter.S6
-rw-r--r--arch/x86/kvm/vmx/vmx.c6
-rw-r--r--arch/x86/kvm/x86.c20
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--arch/x86/lib/copy_user_64.S2
-rw-r--r--arch/x86/lib/getuser.S20
-rw-r--r--arch/x86/lib/putuser.S29
-rw-r--r--arch/x86/lib/usercopy_64.c2
-rw-r--r--arch/x86/math-emu/fpu_emu.h2
-rw-r--r--arch/x86/math-emu/reg_constant.c2
-rw-r--r--arch/x86/mm/fault.c30
-rw-r--r--arch/x86/mm/mem_encrypt.c32
-rw-r--r--arch/x86/xen/enlighten_pv.c3
-rw-r--r--arch/x86/xen/mmu_pv.c12
-rw-r--r--arch/x86/xen/xen-asm.S16
-rw-r--r--arch/x86/xen/xen-ops.h3
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c35
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c18
-rw-r--r--drivers/input/joystick/iforce/iforce-ff.c3
-rw-r--r--drivers/input/joystick/iforce/iforce-main.c3
-rw-r--r--drivers/input/joystick/iforce/iforce-packets.c3
-rw-r--r--drivers/input/joystick/iforce/iforce-serio.c3
-rw-r--r--drivers/input/joystick/iforce/iforce-usb.c3
-rw-r--r--drivers/input/joystick/iforce/iforce.h3
-rw-r--r--drivers/input/keyboard/Kconfig16
-rw-r--r--drivers/input/keyboard/Makefile1
-rw-r--r--drivers/input/keyboard/adp5589-keys.c1
-rw-r--r--drivers/input/keyboard/applespi.c1977
-rw-r--r--drivers/input/keyboard/applespi.h29
-rw-r--r--drivers/input/keyboard/applespi_trace.h93
-rw-r--r--drivers/input/keyboard/mtk-pmic-keys.c9
-rw-r--r--drivers/input/keyboard/sun4i-lradc-keys.c3
-rw-r--r--drivers/input/mouse/alps.c32
-rw-r--r--drivers/input/mouse/synaptics.c3
-rw-r--r--drivers/input/mouse/trackpoint.h3
-rw-r--r--drivers/input/serio/hyperv-keyboard.c4
-rw-r--r--drivers/input/tablet/gtco.c20
-rw-r--r--drivers/input/touchscreen/auo-pixcir-ts.c3
-rw-r--r--drivers/iommu/amd_iommu.c2
-rw-r--r--drivers/memory/.gitignore1
-rw-r--r--drivers/memory/Makefile5
-rw-r--r--drivers/memory/ti-emif-sram-pm.S2
-rw-r--r--drivers/ntb/Kconfig11
-rw-r--r--drivers/ntb/Makefile3
-rw-r--r--drivers/ntb/core.c (renamed from drivers/ntb/ntb.c)0
-rw-r--r--drivers/ntb/hw/amd/ntb_hw_amd.c10
-rw-r--r--drivers/ntb/hw/intel/ntb_hw_gen3.c6
-rw-r--r--drivers/ntb/hw/mscc/ntb_hw_switchtec.c82
-rw-r--r--drivers/ntb/msi.c415
-rw-r--r--drivers/ntb/ntb_transport.c170
-rw-r--r--drivers/ntb/test/Kconfig9
-rw-r--r--drivers/ntb/test/Makefile1
-rw-r--r--drivers/ntb/test/ntb_msi_test.c433
-rw-r--r--drivers/ntb/test/ntb_perf.c14
-rw-r--r--drivers/pci/msi.c54
-rw-r--r--drivers/pci/switch/switchtec.c12
-rw-r--r--drivers/s390/scsi/zfcp_erp.c7
-rw-r--r--drivers/s390/scsi/zfcp_fsf.c55
-rw-r--r--drivers/scsi/Makefile2
-rw-r--r--drivers/scsi/hosts.c3
-rw-r--r--drivers/scsi/libfc/fc_exch.c2
-rw-r--r--drivers/scsi/libsas/sas_scsi_host.c1
-rw-r--r--drivers/scsi/lpfc/lpfc_debugfs.h2
-rw-r--r--drivers/scsi/megaraid/megaraid_sas.h4
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_base.c31
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_scsih.c1
-rw-r--r--drivers/scsi/pm8001/pm8001_sas.c6
-rw-r--r--drivers/scsi/pm8001/pm80xx_hwi.c2
-rw-r--r--drivers/scsi/pm8001/pm80xx_hwi.h2
-rw-r--r--drivers/scsi/scsi_devinfo.c2
-rw-r--r--drivers/scsi/scsi_lib.c13
-rw-r--r--drivers/scsi/sd_zbc.c2
-rw-r--r--drivers/scsi/storvsc_drv.c5
-rw-r--r--drivers/scsi/ufs/ufshcd.c3
-rw-r--r--fs/ceph/dir.c2
-rw-r--r--fs/cifs/cifsfs.c11
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/inode.c16
-rw-r--r--fs/cifs/smb2file.c18
-rw-r--r--fs/cifs/smb2ops.c53
-rw-r--r--fs/cifs/smb2pdu.c46
-rw-r--r--fs/cifs/smb2pdu.h4
-rw-r--r--fs/cifs/smb2proto.h7
-rw-r--r--fs/dcache.c100
-rw-r--r--fs/fs_pin.c10
-rw-r--r--fs/internal.h2
-rw-r--r--fs/mount.h8
-rw-r--r--fs/namespace.c159
-rw-r--r--fs/nfs/super.c6
-rw-r--r--include/Kbuild14
-rw-r--r--include/linux/compiler-gcc.h2
-rw-r--r--include/linux/compiler.h5
-rw-r--r--include/linux/compiler_types.h4
-rw-r--r--include/linux/dma-direct.h9
-rw-r--r--include/linux/dma-mapping.h14
-rw-r--r--include/linux/fs_pin.h1
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--include/linux/msi.h8
-rw-r--r--include/linux/ntb.h200
-rw-r--r--include/linux/pci.h9
-rw-r--r--include/linux/sched/isolation.h6
-rw-r--r--include/scsi/scsi_host.h3
-rw-r--r--kernel/Kconfig.preempt25
-rw-r--r--kernel/bpf/core.c5
-rw-r--r--kernel/dma/Kconfig3
-rw-r--r--kernel/dma/direct.c44
-rw-r--r--kernel/sched/isolation.c6
-rw-r--r--kernel/smp.c16
-rw-r--r--kernel/stacktrace.c5
-rw-r--r--lib/Kconfig.debug12
-rw-r--r--scripts/Kbuild.include5
-rw-r--r--scripts/Makefile.build42
-rw-r--r--scripts/Makefile.lib2
-rw-r--r--scripts/Makefile.modbuiltin2
-rw-r--r--scripts/Makefile.modinst5
-rw-r--r--scripts/Makefile.modpost19
-rw-r--r--scripts/Makefile.modsign3
-rwxr-xr-xscripts/adjust_autoksyms.sh14
-rw-r--r--scripts/coccinelle/api/devm_platform_ioremap_resource.cocci60
-rwxr-xr-xscripts/export_report.pl11
-rw-r--r--scripts/kconfig/Makefile2
-rw-r--r--scripts/kconfig/confdata.c7
-rw-r--r--scripts/kconfig/expr.h1
-rw-r--r--scripts/mod/sumversion.c23
-rwxr-xr-xscripts/modules-check.sh2
-rwxr-xr-xscripts/package/builddeb5
-rwxr-xr-xscripts/package/mkdebian1
-rwxr-xr-xscripts/package/mkspec2
-rw-r--r--tools/objtool/arch.h36
-rw-r--r--tools/objtool/arch/x86/decode.c2
-rw-r--r--tools/objtool/check.c333
-rw-r--r--tools/objtool/check.h3
-rw-r--r--tools/objtool/elf.c8
-rw-r--r--tools/objtool/elf.h5
-rw-r--r--tools/perf/builtin-script.c8
-rw-r--r--tools/perf/builtin-trace.c10
-rw-r--r--tools/perf/builtin-version.c1
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_m8561/basic.json58
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json114
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json30
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_m8561/extended.json373
-rw-r--r--tools/perf/pmu-events/arch/s390/mapfile.csv1
-rw-r--r--tools/perf/scripts/python/export-to-postgresql.py68
-rw-r--r--tools/perf/scripts/python/export-to-sqlite.py54
-rwxr-xr-xtools/perf/scripts/python/exported-sql-viewer.py34
-rw-r--r--tools/perf/tests/builtin-test.c6
-rw-r--r--tools/perf/util/Build1
-rw-r--r--tools/perf/util/cs-etm.c12
-rw-r--r--tools/perf/util/db-export.c291
-rw-r--r--tools/perf/util/db-export.h19
-rw-r--r--tools/perf/util/rlimit.c29
-rw-r--r--tools/perf/util/rlimit.h6
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c53
-rw-r--r--tools/perf/util/trace-event.h3
-rw-r--r--tools/power/cpupower/debug/kernel/Makefile4
-rw-r--r--tools/testing/selftests/kvm/Makefile14
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h8
-rw-r--r--tools/testing/selftests/kvm/include/s390x/processor.h22
-rw-r--r--tools/testing/selftests/kvm/kvm_create_max_vcpus.c (renamed from tools/testing/selftests/kvm/x86_64/kvm_create_max_vcpus.c)2
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c2
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c23
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/processor.c278
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c2
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/vmx.c2
-rw-r--r--tools/testing/selftests/kvm/s390x/sync_regs_test.c151
-rwxr-xr-xtools/testing/selftests/ntb/ntb_test.sh54
-rw-r--r--tools/testing/selftests/x86/fsgsbase.c22
-rw-r--r--usr/include/Makefile8
-rw-r--r--virt/kvm/kvm_main.c12
226 files changed, 6658 insertions, 1379 deletions
diff --git a/.gitignore b/.gitignore
index 7587ef56b92d..8f5422cba6e2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,6 +30,7 @@
*.lz4
*.lzma
*.lzo
+*.mod
*.mod.c
*.o
*.o.*
diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index 5eba889ea84d..9f4392876099 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -30,6 +30,7 @@
*.lzo
*.mo
*.moc
+*.mod
*.mod.c
*.o
*.o.*
diff --git a/Documentation/driver-api/ntb.rst b/Documentation/driver-api/ntb.rst
index 074a423c853c..87d1372da879 100644
--- a/Documentation/driver-api/ntb.rst
+++ b/Documentation/driver-api/ntb.rst
@@ -200,6 +200,33 @@ Debugfs Files:
This file is used to read and write peer scratchpads. See
*spad* for details.
+NTB MSI Test Client (ntb\_msi\_test)
+------------------------------------
+
+The MSI test client serves to test and debug the MSI library which
+allows for passing MSI interrupts across NTB memory windows. The
+test client is interacted with through the debugfs filesystem:
+
+* *debugfs*/ntb\_tool/*hw*/
+ A directory in debugfs will be created for each
+ NTB device probed by the tool. This directory is shortened to *hw*
+ below.
+* *hw*/port
+ This file describes the local port number
+* *hw*/irq*_occurrences
+ One occurrences file exists for each interrupt and, when read,
+ returns the number of times the interrupt has been triggered.
+* *hw*/peer*/port
+ This file describes the port number for each peer
+* *hw*/peer*/count
+ This file describes the number of interrupts that can be
+ triggered on each peer
+* *hw*/peer*/trigger
+ Writing an interrupt number (any number less than the value
+ specified in count) will trigger the interrupt on the
+ specified peer. That peer's interrupt's occurrence file
+ should be incremented.
+
NTB Hardware Drivers
====================
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 209672010fb4..6b7a41cfcaed 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -436,7 +436,7 @@ for the inode. If d_make_root(inode) is passed a NULL inode it returns NULL
and also requires no further error handling. Typical usage is:
inode = foofs_new_inode(....);
- s->s_root = d_make_inode(inode);
+ s->s_root = d_make_root(inode);
if (!s->s_root)
/* Nothing needed for the inode cleanup */
return -ENOMEM;
diff --git a/Documentation/kbuild/kbuild.rst b/Documentation/kbuild/kbuild.rst
index ce9b99c004ae..61b2181ed3ea 100644
--- a/Documentation/kbuild/kbuild.rst
+++ b/Documentation/kbuild/kbuild.rst
@@ -38,12 +38,11 @@ Additional options to the assembler (for built-in and modules).
AFLAGS_MODULE
-------------
-Additional module specific options to use for $(AS).
+Additional assembler options for modules.
AFLAGS_KERNEL
-------------
-Additional options for $(AS) when used for assembler
-code for code that is compiled as built-in.
+Additional assembler options for built-in.
KCFLAGS
-------
diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst
index f31158457753..f4f0f7ffde2b 100644
--- a/Documentation/kbuild/makefiles.rst
+++ b/Documentation/kbuild/makefiles.rst
@@ -328,7 +328,7 @@ more details, with real examples.
variable $(KBUILD_CFLAGS) and uses it for compilation flags for the
entire tree.
- asflags-y specifies options for assembling with $(AS).
+ asflags-y specifies assembler options.
Example::
@@ -490,7 +490,7 @@ more details, with real examples.
as-instr checks if the assembler reports a specific instruction
and then outputs either option1 or option2
C escapes are supported in the test instruction
- Note: as-instr-option uses KBUILD_AFLAGS for $(AS) options
+ Note: as-instr-option uses KBUILD_AFLAGS for assembler options
cc-option
cc-option is used to check if $(CC) supports a given option, and if
@@ -906,7 +906,7 @@ When kbuild executes, the following steps are followed (roughly):
vmlinux. The usage of $(call if_changed,xxx) will be described later.
KBUILD_AFLAGS
- $(AS) assembler flags
+ Assembler flags
Default value - see top level Makefile
Append or modify as required per architecture.
@@ -949,16 +949,16 @@ When kbuild executes, the following steps are followed (roughly):
to 'y' when selected.
KBUILD_AFLAGS_KERNEL
- $(AS) options specific for built-in
+ Assembler options specific for built-in
$(KBUILD_AFLAGS_KERNEL) contains extra C compiler flags used to compile
resident kernel code.
KBUILD_AFLAGS_MODULE
- Options for $(AS) when building modules
+ Assembler options specific for modules
$(KBUILD_AFLAGS_MODULE) is used to add arch-specific options that
- are used for $(AS).
+ are used for assembler.
From commandline AFLAGS_MODULE shall be used (see kbuild.txt).
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 2cd6250b2896..e54a3f51ddc5 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -4090,17 +4090,22 @@ Parameters: struct kvm_pmu_event_filter (in)
Returns: 0 on success, -1 on error
struct kvm_pmu_event_filter {
- __u32 action;
- __u32 nevents;
- __u64 events[0];
+ __u32 action;
+ __u32 nevents;
+ __u32 fixed_counter_bitmap;
+ __u32 flags;
+ __u32 pad[4];
+ __u64 events[0];
};
This ioctl restricts the set of PMU events that the guest can program.
The argument holds a list of events which will be allowed or denied.
The eventsel+umask of each event the guest attempts to program is compared
against the events field to determine whether the guest should have access.
-This only affects general purpose counters; fixed purpose counters can
-be disabled by changing the perfmon CPUID leaf.
+The events field only controls general purpose counters; fixed purpose
+counters are controlled by the fixed_counter_bitmap.
+
+No flags are defined yet, the field must be zero.
Valid values for 'action':
#define KVM_PMU_EVENT_ALLOW 0
diff --git a/MAINTAINERS b/MAINTAINERS
index bd3fe4fe13c4..783569e3c4b4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8878,6 +8878,8 @@ F: arch/s390/include/asm/gmap.h
F: arch/s390/include/asm/kvm*
F: arch/s390/kvm/
F: arch/s390/mm/gmap.c
+F: tools/testing/selftests/kvm/s390x/
+F: tools/testing/selftests/kvm/*/s390x/
KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86)
M: Paolo Bonzini <pbonzini@redhat.com>
@@ -12131,7 +12133,8 @@ F: Documentation/driver-api/parport*.rst
PARAVIRT_OPS INTERFACE
M: Juergen Gross <jgross@suse.com>
-M: Alok Kataria <akataria@vmware.com>
+M: Thomas Hellstrom <thellstrom@vmware.com>
+M: "VMware, Inc." <pv-drivers@vmware.com>
L: virtualization@lists.linux-foundation.org
S: Supported
F: Documentation/virtual/paravirt_ops.txt
@@ -17177,7 +17180,8 @@ S: Maintained
F: drivers/misc/vmw_balloon.c
VMWARE HYPERVISOR INTERFACE
-M: Alok Kataria <akataria@vmware.com>
+M: Thomas Hellstrom <thellstrom@vmware.com>
+M: "VMware, Inc." <pv-drivers@vmware.com>
L: virtualization@lists.linux-foundation.org
S: Supported
F: arch/x86/kernel/cpu/vmware.c
diff --git a/Makefile b/Makefile
index 2c5d00ba537e..fd6f7949bc1a 100644
--- a/Makefile
+++ b/Makefile
@@ -486,11 +486,6 @@ export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE
export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL
export KBUILD_ARFLAGS
-# When compiling out-of-tree modules, put MODVERDIR in the module
-# tree rather than in the kernel tree. The kernel tree might
-# even be read-only.
-export MODVERDIR := $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_versions
-
# Files to ignore in find ... statements
export RCS_FIND_IGNORE := \( -name SCCS -o -name BitKeeper -o -name .svn -o \
@@ -887,6 +882,12 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init)
# change __FILE__ to the relative path from the srctree
KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
+# ensure -fcf-protection is disabled when using retpoline as it is
+# incompatible with -mindirect-branch=thunk-extern
+ifdef CONFIG_RETPOLINE
+KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
+endif
+
# use the deterministic mode of AR if available
KBUILD_ARFLAGS := $(call ar-option,D)
@@ -900,10 +901,8 @@ KBUILD_CPPFLAGS += $(ARCH_CPPFLAGS) $(KCPPFLAGS)
KBUILD_AFLAGS += $(ARCH_AFLAGS) $(KAFLAGS)
KBUILD_CFLAGS += $(ARCH_CFLAGS) $(KCFLAGS)
-# Use --build-id when available.
-LDFLAGS_BUILD_ID := $(call ld-option, --build-id)
-KBUILD_LDFLAGS_MODULE += $(LDFLAGS_BUILD_ID)
-LDFLAGS_vmlinux += $(LDFLAGS_BUILD_ID)
+KBUILD_LDFLAGS_MODULE += --build-id
+LDFLAGS_vmlinux += --build-id
ifeq ($(CONFIG_STRIP_ASM_SYMS),y)
LDFLAGS_vmlinux += $(call ld-option, -X,)
@@ -1031,8 +1030,8 @@ vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_OBJS) $(KBUILD_VMLINUX_LIBS)
# Recurse until adjust_autoksyms.sh is satisfied
PHONY += autoksyms_recursive
-autoksyms_recursive: $(vmlinux-deps)
ifdef CONFIG_TRIM_UNUSED_KSYMS
+autoksyms_recursive: $(vmlinux-deps) modules.order
$(Q)$(CONFIG_SHELL) $(srctree)/scripts/adjust_autoksyms.sh \
"$(MAKE) -f $(srctree)/Makefile vmlinux"
endif
@@ -1074,7 +1073,7 @@ $(sort $(vmlinux-deps)): $(vmlinux-dirs) ;
PHONY += $(vmlinux-dirs)
$(vmlinux-dirs): prepare
- $(Q)$(MAKE) $(build)=$@ need-builtin=1
+ $(Q)$(MAKE) $(build)=$@ need-builtin=1 need-modorder=1
filechk_kernel.release = \
echo "$(KERNELVERSION)$$($(CONFIG_SHELL) $(srctree)/scripts/setlocalversion $(srctree))"
@@ -1096,7 +1095,7 @@ scripts: scripts_basic scripts_dtc
# archprepare is used in arch Makefiles and when processed asm symlink,
# version.h and scripts_basic is processed / created.
-PHONY += prepare archprepare prepare1 prepare3
+PHONY += prepare archprepare prepare3
# prepare3 is used to check if we are building in a separate output directory,
# and if so do:
@@ -1113,11 +1112,8 @@ ifdef building_out_of_srctree
fi;
endif
-prepare1: prepare3 outputmakefile asm-generic $(version_h) $(autoksyms_h) \
- include/generated/utsrelease.h
- $(cmd_crmodverdir)
-
-archprepare: archheaders archscripts prepare1 scripts
+archprepare: archheaders archscripts scripts prepare3 outputmakefile \
+ asm-generic $(version_h) $(autoksyms_h) include/generated/utsrelease.h
prepare0: archprepare
$(Q)$(MAKE) $(build)=scripts/mod
@@ -1331,8 +1327,8 @@ _modinst_:
rm -f $(MODLIB)/build ; \
ln -s $(CURDIR) $(MODLIB)/build ; \
fi
- @cp -f $(objtree)/modules.order $(MODLIB)/
- @cp -f $(objtree)/modules.builtin $(MODLIB)/
+ @sed 's:^:kernel/:' modules.order > $(MODLIB)/modules.order
+ @sed 's:^:kernel/:' modules.builtin > $(MODLIB)/modules.builtin
@cp -f $(objtree)/modules.builtin.modinfo $(MODLIB)/
$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst
@@ -1373,18 +1369,22 @@ endif # CONFIG_MODULES
# make distclean Remove editor backup files, patch leftover files and the like
# Directories & files removed with 'make clean'
-CLEAN_DIRS += $(MODVERDIR) include/ksym
+CLEAN_DIRS += include/ksym
CLEAN_FILES += modules.builtin.modinfo
# Directories & files removed with 'make mrproper'
MRPROPER_DIRS += include/config include/generated \
arch/$(SRCARCH)/include/generated .tmp_objdiff
MRPROPER_FILES += .config .config.old .version \
- Module.symvers tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS \
+ Module.symvers \
signing_key.pem signing_key.priv signing_key.x509 \
x509.genkey extra_certificates signing_key.x509.keyid \
signing_key.x509.signer vmlinux-gdb.py
+# Directories & files removed with 'make distclean'
+DISTCLEAN_DIRS +=
+DISTCLEAN_FILES += tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS
+
# clean - Delete most, but leave enough to build external modules
#
clean: rm-dirs := $(CLEAN_DIRS)
@@ -1417,9 +1417,14 @@ mrproper: clean $(mrproper-dirs)
# distclean
#
+distclean: rm-dirs := $(wildcard $(DISTCLEAN_DIRS))
+distclean: rm-files := $(wildcard $(DISTCLEAN_FILES))
+
PHONY += distclean
distclean: mrproper
+ $(call cmd,rmdirs)
+ $(call cmd,rmfiles)
@find $(srctree) $(RCS_FIND_IGNORE) \
\( -name '*.orig' -o -name '*.rej' -o -name '*~' \
-o -name '*.bak' -o -name '#*#' -o -name '*%' \
@@ -1609,7 +1614,7 @@ $(objtree)/Module.symvers:
module-dirs := $(addprefix _module_,$(KBUILD_EXTMOD))
PHONY += $(module-dirs) modules
$(module-dirs): prepare $(objtree)/Module.symvers
- $(Q)$(MAKE) $(build)=$(patsubst _module_%,%,$@)
+ $(Q)$(MAKE) $(build)=$(patsubst _module_%,%,$@) need-modorder=1
modules: $(module-dirs)
@$(kecho) ' Building modules, stage 2.';
@@ -1634,7 +1639,6 @@ PHONY += $(clean-dirs) clean
$(clean-dirs):
$(Q)$(MAKE) $(clean)=$(patsubst _clean_%,%,$@)
-clean: rm-dirs := $(MODVERDIR)
clean: rm-files := $(KBUILD_EXTMOD)/Module.symvers
PHONY += help
@@ -1648,8 +1652,6 @@ help:
@echo ''
PHONY += prepare
-prepare:
- $(cmd_crmodverdir)
endif # KBUILD_EXTMOD
clean: $(clean-dirs)
@@ -1660,7 +1662,7 @@ clean: $(clean-dirs)
-o -name '*.ko.*' \
-o -name '*.dtb' -o -name '*.dtb.S' -o -name '*.dt.yaml' \
-o -name '*.dwo' -o -name '*.lst' \
- -o -name '*.su' \
+ -o -name '*.su' -o -name '*.mod' \
-o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \
-o -name '*.lex.c' -o -name '*.tab.[ch]' \
-o -name '*.asn1.[ch]' \
@@ -1765,8 +1767,6 @@ build-dir = $(patsubst %/,%,$(dir $(build-target)))
$(Q)$(MAKE) $(build)=$(build-dir) $(build-target)
%.symtypes: prepare FORCE
$(Q)$(MAKE) $(build)=$(build-dir) $(build-target)
-%.ko: %.o
- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
# Modules
PHONY += /
@@ -1789,11 +1789,6 @@ quiet_cmd_depmod = DEPMOD $(KERNELRELEASE)
cmd_depmod = $(CONFIG_SHELL) $(srctree)/scripts/depmod.sh $(DEPMOD) \
$(KERNELRELEASE)
-# Create temporary dir for module support files
-# clean it up only when building all modules
-cmd_crmodverdir = $(Q)mkdir -p $(MODVERDIR) \
- $(if $(KBUILD_MODULES),; rm -f $(MODVERDIR)/*)
-
# read saved command lines for existing targets
existing-targets := $(wildcard $(sort $(targets)))
diff --git a/arch/Kconfig b/arch/Kconfig
index ac0fba400ded..a7b57dd42c26 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -796,6 +796,9 @@ config ARCH_NO_COHERENT_DMA_MMAP
config ARCH_NO_PREEMPT
bool
+config ARCH_SUPPORTS_RT
+ bool
+
config CPU_NO_EFFICIENT_FFS
def_bool n
diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile
index ca85df247775..87b7769214e0 100644
--- a/arch/arm/vdso/Makefile
+++ b/arch/arm/vdso/Makefile
@@ -13,8 +13,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING
ldflags-$(CONFIG_CPU_ENDIAN_BE8) := --be8
ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \
-z max-page-size=4096 -nostdlib -shared $(ldflags-y) \
- $(call ld-option, --hash-style=sysv) \
- $(call ld-option, --build-id) \
+ --hash-style=sysv --build-id \
-T
obj-$(CONFIG_VDSO) += vdso.o
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index 288c14d30b45..60a4c6239712 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -96,8 +96,8 @@ VDSO_LDFLAGS := $(VDSO_CPPFLAGS)
VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1
VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
VDSO_LDFLAGS += -nostdlib -shared -mfloat-abi=soft
-VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--hash-style=sysv)
-VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--build-id)
+VDSO_LDFLAGS += -Wl,--hash-style=sysv
+VDSO_LDFLAGS += -Wl,--build-id
VDSO_LDFLAGS += $(call cc32-ldoption,-fuse-ld=bfd)
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index 3c9e1bd9a3e9..d6544dc71258 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -11,6 +11,8 @@
#include <asm/mem-layout.h>
#include <asm/atomic.h>
+#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
+
#define check_pgt_cache() do {} while (0)
extern unsigned long long kmap_generation;
@@ -46,38 +48,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_page((unsigned long) pgd);
}
-static inline struct page *pte_alloc_one(struct mm_struct *mm)
-{
- struct page *pte;
-
- pte = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!pte)
- return NULL;
- if (!pgtable_page_ctor(pte)) {
- __free_page(pte);
- return NULL;
- }
- return pte;
-}
-
-/* _kernel variant gets to use a different allocator */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- gfp_t flags = GFP_KERNEL | __GFP_ZERO;
- return (pte_t *) __get_free_page(flags);
-}
-
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
-{
- pgtable_page_dtor(pte);
- __free_page(pte);
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- free_page((unsigned long)pte);
-}
-
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
pgtable_t pte)
{
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index f8b3b07e4247..7a117be8297c 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -34,8 +34,6 @@ else
KBUILD_LDFLAGS += -melf32lriscv
endif
-KBUILD_CFLAGS += -Wall
-
# ISA string setting
riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima
riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 5d8570ed6cab..a4ad2733eedf 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -189,6 +189,7 @@ config S390
select VIRT_CPU_ACCOUNTING
select ARCH_HAS_SCALED_CPUTIME
select HAVE_NMI
+ select ARCH_HAS_FORCE_DMA_UNENCRYPTED
select SWIOTLB
select GENERIC_ALLOCATOR
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 9dde4d7d8704..b5fd6e85657c 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1224,28 +1224,11 @@ no_timer:
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
{
- /*
- * We cannot move this into the if, as the CPU might be already
- * in kvm_vcpu_block without having the waitqueue set (polling)
- */
vcpu->valid_wakeup = true;
+ kvm_vcpu_wake_up(vcpu);
+
/*
- * This is mostly to document, that the read in swait_active could
- * be moved before other stores, leading to subtle races.
- * All current users do not store or use an atomic like update
- */
- smp_mb__after_atomic();
- if (swait_active(&vcpu->wq)) {
- /*
- * The vcpu gave up the cpu voluntarily, mark it as a good
- * yield-candidate.
- */
- vcpu->preempted = true;
- swake_up_one(&vcpu->wq);
- vcpu->stat.halt_wakeup++;
- }
- /*
- * The VCPU might not be sleeping but is executing the VSIE. Let's
+ * The VCPU might not be sleeping but rather executing VSIE. Let's
* kick it, so it leaves the SIE to process the request.
*/
kvm_s390_vsie_kick(vcpu);
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 4e5bbe328594..20340a03ad90 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -30,7 +30,7 @@
#include <linux/export.h>
#include <linux/cma.h>
#include <linux/gfp.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
#include <asm/processor.h>
#include <linux/uaccess.h>
#include <asm/pgtable.h>
@@ -161,6 +161,11 @@ bool sev_active(void)
return is_prot_virt_guest();
}
+bool force_dma_unencrypted(struct device *dev)
+{
+ return sev_active();
+}
+
/* protected virtualization */
static void pv_init(void)
{
diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile
index 5a9e4e1f9f81..324a23947585 100644
--- a/arch/sparc/vdso/Makefile
+++ b/arch/sparc/vdso/Makefile
@@ -115,8 +115,7 @@ quiet_cmd_vdso = VDSO $@
-T $(filter %.lds,$^) $(filter %.o,$^) && \
sh $(srctree)/$(src)/checkundef.sh '$(OBJDUMP)' '$@'
-VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \
- $(call ld-option, --build-id) -Bsymbolic
+VDSO_LDFLAGS = -shared --hash-style=both --build-id -Bsymbolic
GCOV_PROFILE := n
#
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 78772870facd..222855cc0158 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1526,6 +1526,7 @@ config AMD_MEM_ENCRYPT
depends on X86_64 && CPU_SUP_AMD
select DYNAMIC_PHYSICAL_MASK
select ARCH_USE_MEMREMAP_PROT
+ select ARCH_HAS_FORCE_DMA_UNENCRYPTED
---help---
Say yes to enable support for the encryption of system memory.
This requires an AMD processor that supports Secure Memory
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 220d1279d0e2..d6662fdef300 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -384,14 +384,11 @@ struct boot_params *make_boot_params(struct efi_config *c)
struct apm_bios_info *bi;
struct setup_header *hdr;
efi_loaded_image_t *image;
- void *options, *handle;
+ void *handle;
efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
int options_size = 0;
efi_status_t status;
char *cmdline_ptr;
- u16 *s2;
- u8 *s1;
- int i;
unsigned long ramdisk_addr;
unsigned long ramdisk_size;
@@ -494,8 +491,6 @@ static void add_e820ext(struct boot_params *params,
struct setup_data *e820ext, u32 nr_entries)
{
struct setup_data *data;
- efi_status_t status;
- unsigned long size;
e820ext->type = SETUP_E820_EXT;
e820ext->len = nr_entries * sizeof(struct boot_e820_entry);
@@ -677,8 +672,6 @@ static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
void *priv)
{
const char *signature;
- __u32 nr_desc;
- efi_status_t status;
struct exit_boot_struct *p = priv;
signature = efi_is_64bit() ? EFI64_LOADER_SIGNATURE
@@ -747,7 +740,6 @@ struct boot_params *
efi_main(struct efi_config *c, struct boot_params *boot_params)
{
struct desc_ptr *gdt = NULL;
- efi_loaded_image_t *image;
struct setup_header *hdr = &boot_params->hdr;
efi_status_t status;
struct desc_struct *desc;
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 24e65a0f756d..53ac0cb2396d 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -17,6 +17,7 @@
#include "pgtable.h"
#include "../string.h"
#include "../voffset.h"
+#include <asm/bootparam_utils.h>
/*
* WARNING!!
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index d2f184165934..c8181392f70d 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -23,7 +23,6 @@
#include <asm/page.h>
#include <asm/boot.h>
#include <asm/bootparam.h>
-#include <asm/bootparam_utils.h>
#define BOOT_CTYPE_H
#include <linux/acpi.h>
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index f8debf7aeb4c..5f2d03067ae5 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -40,7 +40,6 @@ int cmdline_find_option_bool(const char *option);
static unsigned long find_trampoline_placement(void)
{
unsigned long bios_start = 0, ebda_start = 0;
- unsigned long trampoline_start;
struct boot_e820_entry *entry;
char *signature;
int i;
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 9f1f9e3b8230..830bd984182b 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -343,3 +343,9 @@ For 32-bit we have the following conventions - kernel is built with
.Lafter_call_\@:
#endif
.endm
+
+#ifdef CONFIG_PARAVIRT_XXL
+#define GET_CR2_INTO(reg) GET_CR2_INTO_AX ; _ASM_MOV %_ASM_AX, reg
+#else
+#define GET_CR2_INTO(reg) _ASM_MOV %cr2, reg
+#endif
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 90b473297299..2bb986f305ac 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -294,9 +294,11 @@
.Lfinished_frame_\@:
.endm
-.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
+.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0
cld
+.if \skip_gs == 0
PUSH_GS
+.endif
FIXUP_FRAME
pushl %fs
pushl %es
@@ -313,13 +315,13 @@
movl %edx, %es
movl $(__KERNEL_PERCPU), %edx
movl %edx, %fs
+.if \skip_gs == 0
SET_KERNEL_GS %edx
-
+.endif
/* Switch to kernel stack if necessary */
.if \switch_stacks > 0
SWITCH_TO_KERNEL_STACK
.endif
-
.endm
.macro SAVE_ALL_NMI cr3_reg:req
@@ -1441,39 +1443,46 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR,
ENTRY(page_fault)
ASM_CLAC
- pushl $do_page_fault
- ALIGN
- jmp common_exception
+ pushl $0; /* %gs's slot on the stack */
+
+ SAVE_ALL switch_stacks=1 skip_gs=1
+
+ ENCODE_FRAME_POINTER
+ UNWIND_ESPFIX_STACK
+
+ /* fixup %gs */
+ GS_TO_REG %ecx
+ REG_TO_PTGS %ecx
+ SET_KERNEL_GS %ecx
+
+ GET_CR2_INTO(%ecx) # might clobber %eax
+
+ /* fixup orig %eax */
+ movl PT_ORIG_EAX(%esp), %edx # get the error code
+ movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
+
+ TRACE_IRQS_OFF
+ movl %esp, %eax # pt_regs pointer
+ call do_page_fault
+ jmp ret_from_exception
END(page_fault)
common_exception:
/* the function address is in %gs's slot on the stack */
- FIXUP_FRAME
- pushl %fs
- pushl %es
- pushl %ds
- pushl %eax
- movl $(__USER_DS), %eax
- movl %eax, %ds
- movl %eax, %es
- movl $(__KERNEL_PERCPU), %eax
- movl %eax, %fs
- pushl %ebp
- pushl %edi
- pushl %esi
- pushl %edx
- pushl %ecx
- pushl %ebx
- SWITCH_TO_KERNEL_STACK
+ SAVE_ALL switch_stacks=1 skip_gs=1
ENCODE_FRAME_POINTER
- cld
UNWIND_ESPFIX_STACK
+
+ /* fixup %gs */
GS_TO_REG %ecx
movl PT_GS(%esp), %edi # get the function address
- movl PT_ORIG_EAX(%esp), %edx # get the error code
- movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
REG_TO_PTGS %ecx
SET_KERNEL_GS %ecx
+
+ /* fixup orig %eax */
+ movl PT_ORIG_EAX(%esp), %edx # get the error code
+ movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
+
TRACE_IRQS_OFF
movl %esp, %eax # pt_regs pointer
CALL_NOSPEC %edi
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 35a66fcfcb91..3f5a978a02a7 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -864,18 +864,84 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
*/
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)
+.macro idtentry_part do_sym, has_error_code:req, read_cr2:req, paranoid:req, shift_ist=-1, ist_offset=0
+
+ .if \paranoid
+ call paranoid_entry
+ /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
+ .else
+ call error_entry
+ .endif
+ UNWIND_HINT_REGS
+
+ .if \read_cr2
+ /*
+ * Store CR2 early so subsequent faults cannot clobber it. Use R12 as
+ * intermediate storage as RDX can be clobbered in enter_from_user_mode().
+ * GET_CR2_INTO can clobber RAX.
+ */
+ GET_CR2_INTO(%r12);
+ .endif
+
+ .if \shift_ist != -1
+ TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */
+ .else
+ TRACE_IRQS_OFF
+ .endif
+
+ .if \paranoid == 0
+ testb $3, CS(%rsp)
+ jz .Lfrom_kernel_no_context_tracking_\@
+ CALL_enter_from_user_mode
+.Lfrom_kernel_no_context_tracking_\@:
+ .endif
+
+ movq %rsp, %rdi /* pt_regs pointer */
+
+ .if \has_error_code
+ movq ORIG_RAX(%rsp), %rsi /* get error code */
+ movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
+ .else
+ xorl %esi, %esi /* no error code */
+ .endif
+
+ .if \shift_ist != -1
+ subq $\ist_offset, CPU_TSS_IST(\shift_ist)
+ .endif
+
+ .if \read_cr2
+ movq %r12, %rdx /* Move CR2 into 3rd argument */
+ .endif
+
+ call \do_sym
+
+ .if \shift_ist != -1
+ addq $\ist_offset, CPU_TSS_IST(\shift_ist)
+ .endif
+
+ .if \paranoid
+ /* this procedure expect "no swapgs" flag in ebx */
+ jmp paranoid_exit
+ .else
+ jmp error_exit
+ .endif
+
+.endm
+
/**
* idtentry - Generate an IDT entry stub
* @sym: Name of the generated entry point
- * @do_sym: C function to be called
- * @has_error_code: True if this IDT vector has an error code on the stack
- * @paranoid: non-zero means that this vector may be invoked from
+ * @do_sym: C function to be called
+ * @has_error_code: True if this IDT vector has an error code on the stack
+ * @paranoid: non-zero means that this vector may be invoked from
* kernel mode with user GSBASE and/or user CR3.
* 2 is special -- see below.
* @shift_ist: Set to an IST index if entries from kernel mode should
- * decrement the IST stack so that nested entries get a
+ * decrement the IST stack so that nested entries get a
* fresh stack. (This is for #DB, which has a nasty habit
- * of recursing.)
+ * of recursing.)
+ * @create_gap: create a 6-word stack gap when coming from kernel mode.
+ * @read_cr2: load CR2 into the 3rd argument; done before calling any C code
*
* idtentry generates an IDT stub that sets up a usable kernel context,
* creates struct pt_regs, and calls @do_sym. The stub has the following
@@ -900,15 +966,19 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
* @paranoid == 2 is special: the stub will never switch stacks. This is for
* #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
*/
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0
+.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 read_cr2=0
ENTRY(\sym)
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
/* Sanity check */
- .if \shift_ist != -1 && \paranoid == 0
+ .if \shift_ist != -1 && \paranoid != 1
.error "using shift_ist requires paranoid=1"
.endif
+ .if \create_gap && \paranoid
+ .error "using create_gap requires paranoid=0"
+ .endif
+
ASM_CLAC
.if \has_error_code == 0
@@ -934,47 +1004,7 @@ ENTRY(\sym)
.Lfrom_usermode_no_gap_\@:
.endif
- .if \paranoid
- call paranoid_entry
- .else
- call error_entry
- .endif
- UNWIND_HINT_REGS
- /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
-
- .if \paranoid
- .if \shift_ist != -1
- TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */
- .else
- TRACE_IRQS_OFF
- .endif
- .endif
-
- movq %rsp, %rdi /* pt_regs pointer */
-
- .if \has_error_code
- movq ORIG_RAX(%rsp), %rsi /* get error code */
- movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
- .else
- xorl %esi, %esi /* no error code */
- .endif
-
- .if \shift_ist != -1
- subq $\ist_offset, CPU_TSS_IST(\shift_ist)
- .endif
-
- call \do_sym
-
- .if \shift_ist != -1
- addq $\ist_offset, CPU_TSS_IST(\shift_ist)
- .endif
-
- /* these procedures expect "no swapgs" flag in ebx */
- .if \paranoid
- jmp paranoid_exit
- .else
- jmp error_exit
- .endif
+ idtentry_part \do_sym, \has_error_code, \read_cr2, \paranoid, \shift_ist, \ist_offset
.if \paranoid == 1
/*
@@ -983,21 +1013,9 @@ ENTRY(\sym)
* run in real process context if user_mode(regs).
*/
.Lfrom_usermode_switch_stack_\@:
- call error_entry
-
- movq %rsp, %rdi /* pt_regs pointer */
-
- .if \has_error_code
- movq ORIG_RAX(%rsp), %rsi /* get error code */
- movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
- .else
- xorl %esi, %esi /* no error code */
+ idtentry_part \do_sym, \has_error_code, \read_cr2, paranoid=0
.endif
- call \do_sym
-
- jmp error_exit
- .endif
_ASM_NOKPROBE(\sym)
END(\sym)
.endm
@@ -1007,7 +1025,7 @@ idtentry overflow do_overflow has_error_code=0
idtentry bounds do_bounds has_error_code=0
idtentry invalid_op do_invalid_op has_error_code=0
idtentry device_not_available do_device_not_available has_error_code=0
-idtentry double_fault do_double_fault has_error_code=1 paranoid=2
+idtentry double_fault do_double_fault has_error_code=1 paranoid=2 read_cr2=1
idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0
idtentry invalid_TSS do_invalid_TSS has_error_code=1
idtentry segment_not_present do_segment_not_present has_error_code=1
@@ -1179,10 +1197,10 @@ idtentry xendebug do_debug has_error_code=0
#endif
idtentry general_protection do_general_protection has_error_code=1
-idtentry page_fault do_page_fault has_error_code=1
+idtentry page_fault do_page_fault has_error_code=1 read_cr2=1
#ifdef CONFIG_KVM_GUEST
-idtentry async_page_fault do_async_page_fault has_error_code=1
+idtentry async_page_fault do_async_page_fault has_error_code=1 read_cr2=1
#endif
#ifdef CONFIG_X86_MCE
@@ -1281,18 +1299,9 @@ ENTRY(error_entry)
movq %rax, %rsp /* switch stack */
ENCODE_FRAME_POINTER
pushq %r12
-
- /*
- * We need to tell lockdep that IRQs are off. We can't do this until
- * we fix gsbase, and we should do it before enter_from_user_mode
- * (which can take locks).
- */
- TRACE_IRQS_OFF
- CALL_enter_from_user_mode
ret
.Lerror_entry_done:
- TRACE_IRQS_OFF
ret
/*
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index cfdca8b42c70..cc20465b2867 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -12,9 +12,7 @@
/* rdi: arg1 ... normal C conventions. rax is saved/restored. */
.macro THUNK name, func, put_ret_addr_in_rdi=0
- .globl \name
- .type \name, @function
-\name:
+ ENTRY(\name)
pushq %rbp
movq %rsp, %rbp
@@ -35,6 +33,7 @@
call \func
jmp .L_restore
+ ENDPROC(\name)
_ASM_NOKPROBE(\name)
.endm
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 34773395139a..8df549138193 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -176,9 +176,8 @@ quiet_cmd_vdso = VDSO $@
-T $(filter %.lds,$^) $(filter %.o,$^) && \
sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
-VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \
- $(call ld-option, --build-id) $(call ld-option, --eh-frame-hdr) \
- -Bsymbolic
+VDSO_LDFLAGS = -shared --hash-style=both --build-id \
+ $(call ld-option, --eh-frame-hdr) -Bsymbolic
GCOV_PROFILE := n
quiet_cmd_vdso_and_check = VDSO $@
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 0e033ef11a9f..0d258688c8cf 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -60,8 +60,17 @@ static int hv_cpu_init(unsigned int cpu)
if (!hv_vp_assist_page)
return 0;
- if (!*hvp)
- *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
+ /*
+ * The VP ASSIST PAGE is an "overlay" page (see Hyper-V TLFS's Section
+ * 5.2.1 "GPA Overlay Pages"). Here it must be zeroed out to make sure
+ * we always write the EOI MSR in hv_apic_eoi_write() *after* the
+ * EOI optimization is disabled in hv_cpu_die(), otherwise a CPU may
+ * not be stopped in the case of CPU offlining and the VM will hang.
+ */
+ if (!*hvp) {
+ *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO,
+ PAGE_KERNEL);
+ }
if (*hvp) {
u64 val;
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 050e5f9ebf81..e647aa095867 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -49,7 +49,7 @@ static inline void generic_apic_probe(void)
#ifdef CONFIG_X86_LOCAL_APIC
-extern unsigned int apic_verbosity;
+extern int apic_verbosity;
extern int local_apic_timer_c2_ok;
extern int disable_apic;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0cc5b611a113..8282b8d41209 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1496,25 +1496,29 @@ enum {
#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
+asmlinkage void __noreturn kvm_spurious_fault(void);
+
/*
* Hardware virtualization extension instructions may fault if a
* reboot turns off virtualization while processes are running.
- * Trap the fault and ignore the instruction if that happens.
+ * Usually after catching the fault we just panic; during reboot
+ * instead the instruction is ignored.
*/
-asmlinkage void kvm_spurious_fault(void);
-
-#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \
- "666: " insn "\n\t" \
- "668: \n\t" \
- ".pushsection .fixup, \"ax\" \n" \
- "667: \n\t" \
- cleanup_insn "\n\t" \
- "cmpb $0, kvm_rebooting \n\t" \
- "jne 668b \n\t" \
- __ASM_SIZE(push) " $666b \n\t" \
- "jmp kvm_spurious_fault \n\t" \
- ".popsection \n\t" \
- _ASM_EXTABLE(666b, 667b)
+#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \
+ "666: \n\t" \
+ insn "\n\t" \
+ "jmp 668f \n\t" \
+ "667: \n\t" \
+ "call kvm_spurious_fault \n\t" \
+ "668: \n\t" \
+ ".pushsection .fixup, \"ax\" \n\t" \
+ "700: \n\t" \
+ cleanup_insn "\n\t" \
+ "cmpb $0, kvm_rebooting\n\t" \
+ "je 667b \n\t" \
+ "jmp 668b \n\t" \
+ ".popsection \n\t" \
+ _ASM_EXTABLE(666b, 700b)
#define __kvm_handle_fault_on_reboot(insn) \
____kvm_handle_fault_on_reboot(insn, "")
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 5ed3cf1c3934..9b4df6eaa11a 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -92,7 +92,7 @@ void kvm_async_pf_task_wait(u32 token, int interrupt_kernel);
void kvm_async_pf_task_wake(u32 token);
u32 kvm_read_and_reset_pf_reason(void);
extern void kvm_disable_steal_time(void);
-void do_async_page_fault(struct pt_regs *regs, unsigned long error_code);
+void do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
#ifdef CONFIG_PARAVIRT_SPINLOCKS
void __init kvm_spinlock_init(void);
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c25c38a05c1c..dce26f1d13e1 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -116,7 +116,7 @@ static inline void write_cr0(unsigned long x)
static inline unsigned long read_cr2(void)
{
- return PVOP_CALL0(unsigned long, mmu.read_cr2);
+ return PVOP_CALLEE0(unsigned long, mmu.read_cr2);
}
static inline void write_cr2(unsigned long x)
@@ -746,6 +746,7 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
PV_RESTORE_ALL_CALLER_REGS \
FRAME_END \
"ret;" \
+ ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \
".popsection")
/* Get a reference to a callee-save function */
@@ -909,13 +910,7 @@ extern void default_banner(void);
ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_ops+PV_CPU_swapgs); \
)
-#endif
-
-#define GET_CR2_INTO_RAX \
- ANNOTATE_RETPOLINE_SAFE; \
- call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2);
-#ifdef CONFIG_PARAVIRT_XXL
#define USERGS_SYSRET64 \
PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64), \
ANNOTATE_RETPOLINE_SAFE; \
@@ -929,9 +924,19 @@ extern void default_banner(void);
call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#endif
-#endif
+#endif /* CONFIG_PARAVIRT_XXL */
+#endif /* CONFIG_X86_64 */
+
+#ifdef CONFIG_PARAVIRT_XXL
+
+#define GET_CR2_INTO_AX \
+ PARA_SITE(PARA_PATCH(PV_MMU_read_cr2), \
+ ANNOTATE_RETPOLINE_SAFE; \
+ call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2); \
+ )
+
+#endif /* CONFIG_PARAVIRT_XXL */
-#endif /* CONFIG_X86_32 */
#endif /* __ASSEMBLY__ */
#else /* CONFIG_PARAVIRT */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 946f8f1f1efc..639b2df445ee 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -220,7 +220,7 @@ struct pv_mmu_ops {
void (*exit_mmap)(struct mm_struct *mm);
#ifdef CONFIG_PARAVIRT_XXL
- unsigned long (*read_cr2)(void);
+ struct paravirt_callee_save read_cr2;
void (*write_cr2)(unsigned long);
unsigned long (*read_cr3)(void);
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index f2bd284abc16..b25e633033c3 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -74,14 +74,14 @@ dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code);
dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code);
dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code);
#ifdef CONFIG_X86_64
-dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code);
+dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long address);
asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
asmlinkage __visible notrace
struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s);
void __init trap_init(void);
#endif
dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code);
-dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code);
+dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code);
dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code);
dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code);
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index e901b0ab116f..503d3f42da16 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -435,9 +435,12 @@ struct kvm_nested_state {
/* for KVM_CAP_PMU_EVENT_FILTER */
struct kvm_pmu_event_filter {
- __u32 action;
- __u32 nevents;
- __u64 events[0];
+ __u32 action;
+ __u32 nevents;
+ __u32 fixed_counter_bitmap;
+ __u32 flags;
+ __u32 pad[4];
+ __u64 events[0];
};
#define KVM_PMU_EVENT_ALLOW 0
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 1bd91cb7b320..f5291362da1a 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -183,7 +183,7 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
/*
* Debug level, exported for io_apic.c
*/
-unsigned int apic_verbosity;
+int apic_verbosity;
int pic_mode;
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index da64452584b0..5c7ee3df4d0b 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -76,6 +76,7 @@ static void __used common(void)
BLANK();
OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
+ OFFSET(XEN_vcpu_info_arch_cr2, vcpu_info, arch.cr2);
#endif
BLANK();
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index e69408bf664b..7da2bcd2b8eb 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -86,9 +86,9 @@ static bool _e820__mapped_any(struct e820_table *table,
continue;
if (entry->addr >= end || entry->addr + entry->size <= start)
continue;
- return 1;
+ return true;
}
- return 0;
+ return false;
}
bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index bcd206c8ac90..a6342c899be5 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -29,9 +29,7 @@
#ifdef CONFIG_PARAVIRT_XXL
#include <asm/asm-offsets.h>
#include <asm/paravirt.h>
-#define GET_CR2_INTO(reg) GET_CR2_INTO_RAX ; movq %rax, reg
#else
-#define GET_CR2_INTO(reg) movq %cr2, reg
#define INTERRUPT_RETURN iretq
#endif
@@ -253,10 +251,10 @@ END(secondary_startup_64)
* start_secondary() via .Ljump_to_C_code.
*/
ENTRY(start_cpu0)
- movq initial_stack(%rip), %rsp
UNWIND_HINT_EMPTY
+ movq initial_stack(%rip), %rsp
jmp .Ljump_to_C_code
-ENDPROC(start_cpu0)
+END(start_cpu0)
#endif
/* Both SMP bootup and ACPI suspend change these variables */
@@ -323,7 +321,7 @@ early_idt_handler_common:
cmpq $14,%rsi /* Page fault? */
jnz 10f
- GET_CR2_INTO(%rdi) /* Can clobber any volatile register if pv */
+ GET_CR2_INTO(%rdi) /* can clobber %rax if pv */
call early_make_pgtable
andl %eax,%eax
jz 20f /* All good */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 82caf01b63dd..b7f34fe2171e 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -242,23 +242,23 @@ EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason);
dotraplinkage void
-do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
+do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
{
enum ctx_state prev_state;
switch (kvm_read_and_reset_pf_reason()) {
default:
- do_page_fault(regs, error_code);
+ do_page_fault(regs, error_code, address);
break;
case KVM_PV_REASON_PAGE_NOT_PRESENT:
/* page is swapped out by the host. */
prev_state = exception_enter();
- kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs));
+ kvm_async_pf_task_wait((u32)address, !user_mode(regs));
exception_exit(prev_state);
break;
case KVM_PV_REASON_PAGE_READY:
rcu_irq_enter();
- kvm_async_pf_task_wake((u32)read_cr2());
+ kvm_async_pf_task_wake((u32)address);
rcu_irq_exit();
break;
}
@@ -838,6 +838,7 @@ asm(
"cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
"setne %al;"
"ret;"
+".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;"
".popsection");
#endif
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 1bfe5c6e6cfe..afac7ccce72f 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -546,17 +546,15 @@ void __init default_get_smp_config(unsigned int early)
* local APIC has default address
*/
mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
- return;
+ goto out;
}
pr_info("Default MP configuration #%d\n", mpf->feature1);
construct_default_ISA_mptable(mpf->feature1);
} else if (mpf->physptr) {
- if (check_physptr(mpf, early)) {
- early_memunmap(mpf, sizeof(*mpf));
- return;
- }
+ if (check_physptr(mpf, early))
+ goto out;
} else
BUG();
@@ -565,7 +563,7 @@ void __init default_get_smp_config(unsigned int early)
/*
* Only use the first configuration found.
*/
-
+out:
early_memunmap(mpf, sizeof(*mpf));
}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 98039d7fb998..0aa6256eedd8 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -370,7 +370,7 @@ struct paravirt_patch_template pv_ops = {
.mmu.exit_mmap = paravirt_nop,
#ifdef CONFIG_PARAVIRT_XXL
- .mmu.read_cr2 = native_read_cr2,
+ .mmu.read_cr2 = __PV_IS_CALLEE_SAVE(native_read_cr2),
.mmu.write_cr2 = native_write_cr2,
.mmu.read_cr3 = __native_read_cr3,
.mmu.write_cr3 = native_write_cr3,
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 250e4c4ac6d9..af64519b2695 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -143,17 +143,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
void release_thread(struct task_struct *dead_task)
{
- if (dead_task->mm) {
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
- if (dead_task->mm->context.ldt) {
- pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
- dead_task->comm,
- dead_task->mm->context.ldt->entries,
- dead_task->mm->context.ldt->nr_entries);
- BUG();
- }
-#endif
- }
+ WARN_ON(dead_task->mm);
}
enum which_selector {
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 71691a8310e7..0fdbe89d0754 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -369,12 +369,22 @@ static int putreg(struct task_struct *child,
case offsetof(struct user_regs_struct,fs_base):
if (value >= TASK_SIZE_MAX)
return -EIO;
- x86_fsbase_write_task(child, value);
+ /*
+ * When changing the FS base, use do_arch_prctl_64()
+ * to set the index to zero and to set the base
+ * as requested.
+ */
+ if (child->thread.fsbase != value)
+ return do_arch_prctl_64(child, ARCH_SET_FS, value);
return 0;
case offsetof(struct user_regs_struct,gs_base):
+ /*
+ * Exactly the same here as the %fs handling above.
+ */
if (value >= TASK_SIZE_MAX)
return -EIO;
- x86_gsbase_write_task(child, value);
+ if (child->thread.gsbase != value)
+ return do_arch_prctl_64(child, ARCH_SET_GS, value);
return 0;
#endif
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 87095a477154..4bb0f8447112 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -313,13 +313,10 @@ __visible void __noreturn handle_stack_overflow(const char *message,
#ifdef CONFIG_X86_64
/* Runs on IST stack */
-dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
+dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2)
{
static const char str[] = "double fault";
struct task_struct *tsk = current;
-#ifdef CONFIG_VMAP_STACK
- unsigned long cr2;
-#endif
#ifdef CONFIG_X86_ESPFIX64
extern unsigned char native_irq_return_iret[];
@@ -415,7 +412,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
* stack even if the actual trigger for the double fault was
* something else.
*/
- cr2 = read_cr2();
if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE)
handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2);
#endif
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index ead681210306..22c2720cd948 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -368,9 +368,13 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
F(MD_CLEAR);
+ /* cpuid 7.1.eax */
+ const u32 kvm_cpuid_7_1_eax_x86_features =
+ F(AVX512_BF16);
+
switch (index) {
case 0:
- entry->eax = 0;
+ entry->eax = min(entry->eax, 1u);
entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
cpuid_mask(&entry->ebx, CPUID_7_0_EBX);
/* TSC_ADJUST is emulated */
@@ -394,6 +398,12 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
*/
entry->edx |= F(ARCH_CAPABILITIES);
break;
+ case 1:
+ entry->eax &= kvm_cpuid_7_1_eax_x86_features;
+ entry->ebx = 0;
+ entry->ecx = 0;
+ entry->edx = 0;
+ break;
default:
WARN_ON_ONCE(1);
entry->eax = 0;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 8e409ad448f9..718f7d9afedc 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -312,29 +312,42 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
-#define FOP_FUNC(name) \
+#define __FOP_FUNC(name) \
".align " __stringify(FASTOP_SIZE) " \n\t" \
".type " name ", @function \n\t" \
name ":\n\t"
-#define FOP_RET "ret \n\t"
+#define FOP_FUNC(name) \
+ __FOP_FUNC(#name)
+
+#define __FOP_RET(name) \
+ "ret \n\t" \
+ ".size " name ", .-" name "\n\t"
+
+#define FOP_RET(name) \
+ __FOP_RET(#name)
#define FOP_START(op) \
extern void em_##op(struct fastop *fake); \
asm(".pushsection .text, \"ax\" \n\t" \
".global em_" #op " \n\t" \
- FOP_FUNC("em_" #op)
+ ".align " __stringify(FASTOP_SIZE) " \n\t" \
+ "em_" #op ":\n\t"
#define FOP_END \
".popsection")
+#define __FOPNOP(name) \
+ __FOP_FUNC(name) \
+ __FOP_RET(name)
+
#define FOPNOP() \
- FOP_FUNC(__stringify(__UNIQUE_ID(nop))) \
- FOP_RET
+ __FOPNOP(__stringify(__UNIQUE_ID(nop)))
#define FOP1E(op, dst) \
- FOP_FUNC(#op "_" #dst) \
- "10: " #op " %" #dst " \n\t" FOP_RET
+ __FOP_FUNC(#op "_" #dst) \
+ "10: " #op " %" #dst " \n\t" \
+ __FOP_RET(#op "_" #dst)
#define FOP1EEX(op, dst) \
FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception)
@@ -366,8 +379,9 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
FOP_END
#define FOP2E(op, dst, src) \
- FOP_FUNC(#op "_" #dst "_" #src) \
- #op " %" #src ", %" #dst " \n\t" FOP_RET
+ __FOP_FUNC(#op "_" #dst "_" #src) \
+ #op " %" #src ", %" #dst " \n\t" \
+ __FOP_RET(#op "_" #dst "_" #src)
#define FASTOP2(op) \
FOP_START(op) \
@@ -405,8 +419,9 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
FOP_END
#define FOP3E(op, dst, src, src2) \
- FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
- #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
+ __FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
+ #op " %" #src2 ", %" #src ", %" #dst " \n\t"\
+ __FOP_RET(#op "_" #dst "_" #src "_" #src2)
/* 3-operand, word-only, src2=cl */
#define FASTOP3WCL(op) \
@@ -423,7 +438,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
".type " #op ", @function \n\t" \
#op ": \n\t" \
#op " %al \n\t" \
- FOP_RET
+ __FOP_RET(#op)
asm(".pushsection .fixup, \"ax\"\n"
".global kvm_fastop_exception \n"
@@ -449,7 +464,10 @@ FOP_SETCC(setle)
FOP_SETCC(setnle)
FOP_END;
-FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET
+FOP_START(salc)
+FOP_FUNC(salc)
+"pushf; sbb %al, %al; popf \n\t"
+FOP_RET(salc)
FOP_END;
/*
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index a39e38f13029..c10a8b10b203 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1594,7 +1594,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
{
u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS;
uint16_t code, rep_idx, rep_cnt;
- bool fast, longmode, rep;
+ bool fast, rep;
/*
* hypercall generates UD from non zero cpl and real mode
@@ -1605,9 +1605,14 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
return 1;
}
- longmode = is_64_bit_mode(vcpu);
-
- if (!longmode) {
+#ifdef CONFIG_X86_64
+ if (is_64_bit_mode(vcpu)) {
+ param = kvm_rcx_read(vcpu);
+ ingpa = kvm_rdx_read(vcpu);
+ outgpa = kvm_r8_read(vcpu);
+ } else
+#endif
+ {
param = ((u64)kvm_rdx_read(vcpu) << 32) |
(kvm_rax_read(vcpu) & 0xffffffff);
ingpa = ((u64)kvm_rbx_read(vcpu) << 32) |
@@ -1615,13 +1620,6 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
outgpa = ((u64)kvm_rdi_read(vcpu) << 32) |
(kvm_rsi_read(vcpu) & 0xffffffff);
}
-#ifdef CONFIG_X86_64
- else {
- param = kvm_rcx_read(vcpu);
- ingpa = kvm_rdx_read(vcpu);
- outgpa = kvm_r8_read(vcpu);
- }
-#endif
code = param & 0xffff;
fast = !!(param & HV_HYPERCALL_FAST_BIT);
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 1add1bc881e2..d859ae8890d0 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -45,11 +45,6 @@
#include "lapic.h"
#include "irq.h"
-#if 0
-#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
-#else
-#define ioapic_debug(fmt, arg...)
-#endif
static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
bool line_status);
@@ -294,7 +289,6 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
default:
index = (ioapic->ioregsel - 0x10) >> 1;
- ioapic_debug("change redir index %x val %x\n", index, val);
if (index >= IOAPIC_NUM_PINS)
return;
e = &ioapic->redirtbl[index];
@@ -343,12 +337,6 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
entry->fields.remote_irr))
return -1;
- ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
- "vector=%x trig_mode=%x\n",
- entry->fields.dest_id, entry->fields.dest_mode,
- entry->fields.delivery_mode, entry->fields.vector,
- entry->fields.trig_mode);
-
irqe.dest_id = entry->fields.dest_id;
irqe.vector = entry->fields.vector;
irqe.dest_mode = entry->fields.dest_mode;
@@ -515,7 +503,6 @@ static int ioapic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!ioapic_in_range(ioapic, addr))
return -EOPNOTSUPP;
- ioapic_debug("addr %lx\n", (unsigned long)addr);
ASSERT(!(addr & 0xf)); /* check alignment */
addr &= 0xff;
@@ -558,8 +545,6 @@ static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!ioapic_in_range(ioapic, addr))
return -EOPNOTSUPP;
- ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
- (void*)addr, len, val);
ASSERT(!(addr & 0xf)); /* check alignment */
switch (len) {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a232e76d8f23..0aa158657f20 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -52,9 +52,6 @@
#define PRIu64 "u"
#define PRIo64 "o"
-/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
-#define apic_debug(fmt, arg...) do {} while (0)
-
/* 14 is the version for Xeon and Pentium 8.4.8*/
#define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
#define LAPIC_MMIO_LENGTH (1 << 12)
@@ -121,6 +118,17 @@ static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
return apic->vcpu->vcpu_id;
}
+bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
+{
+ return pi_inject_timer && kvm_vcpu_apicv_active(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_can_post_timer_interrupt);
+
+static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
+{
+ return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
+}
+
static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
switch (map->mode) {
@@ -627,7 +635,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
{
u8 val;
if (pv_eoi_get_user(vcpu, &val) < 0)
- apic_debug("Can't read EOI MSR value: 0x%llx\n",
+ printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
(unsigned long long)vcpu->arch.pv_eoi.msr_val);
return val & 0x1;
}
@@ -635,7 +643,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
{
if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
- apic_debug("Can't set EOI MSR value: 0x%llx\n",
+ printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n",
(unsigned long long)vcpu->arch.pv_eoi.msr_val);
return;
}
@@ -645,7 +653,7 @@ static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
{
if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
- apic_debug("Can't clear EOI MSR value: 0x%llx\n",
+ printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n",
(unsigned long long)vcpu->arch.pv_eoi.msr_val);
return;
}
@@ -679,9 +687,6 @@ static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
else
ppr = isrv & 0xf0;
- apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
- apic, ppr, isr, isrv);
-
*new_ppr = ppr;
if (old_ppr != ppr)
kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
@@ -758,8 +763,6 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
return ((logical_id >> 4) == (mda >> 4))
&& (logical_id & mda & 0xf) != 0;
default:
- apic_debug("Bad DFR vcpu %d: %08x\n",
- apic->vcpu->vcpu_id, kvm_lapic_get_reg(apic, APIC_DFR));
return false;
}
}
@@ -798,10 +801,6 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
struct kvm_lapic *target = vcpu->arch.apic;
u32 mda = kvm_apic_mda(vcpu, dest, source, target);
- apic_debug("target %p, source %p, dest 0x%x, "
- "dest_mode 0x%x, short_hand 0x%x\n",
- target, source, dest, dest_mode, short_hand);
-
ASSERT(target);
switch (short_hand) {
case APIC_DEST_NOSHORT:
@@ -816,8 +815,6 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
case APIC_DEST_ALLBUT:
return target != source;
default:
- apic_debug("kvm: apic: Bad dest shorthand value %x\n",
- short_hand);
return false;
}
}
@@ -1095,15 +1092,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
smp_wmb();
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
- } else {
- apic_debug("Ignoring de-assert INIT to vcpu %d\n",
- vcpu->vcpu_id);
}
break;
case APIC_DM_STARTUP:
- apic_debug("SIPI to vcpu %d vector 0x%02x\n",
- vcpu->vcpu_id, vector);
result = 1;
apic->sipi_vector = vector;
/* make sure sipi_vector is visible for the receiver */
@@ -1221,14 +1213,6 @@ static void apic_send_ipi(struct kvm_lapic *apic)
trace_kvm_apic_ipi(icr_low, irq.dest_id);
- apic_debug("icr_high 0x%x, icr_low 0x%x, "
- "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
- "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x, "
- "msi_redir_hint 0x%x\n",
- icr_high, icr_low, irq.shorthand, irq.dest_id,
- irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
- irq.vector, irq.msi_redir_hint);
-
kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
}
@@ -1282,7 +1266,6 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
switch (offset) {
case APIC_ARBPRI:
- apic_debug("Access APIC ARBPRI register which is for P6\n");
break;
case APIC_TMCCT: /* Timer CCR */
@@ -1349,11 +1332,8 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
if (!apic_x2apic_mode(apic))
valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
- if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) {
- apic_debug("KVM_APIC_READ: read reserved register %x\n",
- offset);
+ if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
return 1;
- }
result = __apic_read(apic, offset & ~0xf);
@@ -1411,9 +1391,6 @@ static void update_divide_count(struct kvm_lapic *apic)
tmp1 = tdcr & 0xf;
tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
apic->divide_count = 0x1 << (tmp2 & 0x7);
-
- apic_debug("timer divide count is 0x%x\n",
- apic->divide_count);
}
static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
@@ -1455,29 +1432,6 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
}
}
-static void apic_timer_expired(struct kvm_lapic *apic)
-{
- struct kvm_vcpu *vcpu = apic->vcpu;
- struct swait_queue_head *q = &vcpu->wq;
- struct kvm_timer *ktimer = &apic->lapic_timer;
-
- if (atomic_read(&apic->lapic_timer.pending))
- return;
-
- atomic_inc(&apic->lapic_timer.pending);
- kvm_set_pending_timer(vcpu);
-
- /*
- * For x86, the atomic_inc() is serialized, thus
- * using swait_active() is safe.
- */
- if (swait_active(q))
- swake_up_one(q);
-
- if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
- ktimer->expired_tscdeadline = ktimer->tscdeadline;
-}
-
/*
* On APICv, this test will cause a busy wait
* during a higher-priority task.
@@ -1551,7 +1505,7 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
}
-void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
+static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
u64 guest_tsc, tsc_deadline;
@@ -1559,9 +1513,6 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
if (apic->lapic_timer.expired_tscdeadline == 0)
return;
- if (!lapic_timer_int_injected(vcpu))
- return;
-
tsc_deadline = apic->lapic_timer.expired_tscdeadline;
apic->lapic_timer.expired_tscdeadline = 0;
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
@@ -1573,8 +1524,57 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
}
+
+void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
+{
+ if (lapic_timer_int_injected(vcpu))
+ __kvm_wait_lapic_expire(vcpu);
+}
EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
+static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
+{
+ struct kvm_timer *ktimer = &apic->lapic_timer;
+
+ kvm_apic_local_deliver(apic, APIC_LVTT);
+ if (apic_lvtt_tscdeadline(apic))
+ ktimer->tscdeadline = 0;
+ if (apic_lvtt_oneshot(apic)) {
+ ktimer->tscdeadline = 0;
+ ktimer->target_expiration = 0;
+ }
+}
+
+static void apic_timer_expired(struct kvm_lapic *apic)
+{
+ struct kvm_vcpu *vcpu = apic->vcpu;
+ struct swait_queue_head *q = &vcpu->wq;
+ struct kvm_timer *ktimer = &apic->lapic_timer;
+
+ if (atomic_read(&apic->lapic_timer.pending))
+ return;
+
+ if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
+ ktimer->expired_tscdeadline = ktimer->tscdeadline;
+
+ if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
+ if (apic->lapic_timer.timer_advance_ns)
+ __kvm_wait_lapic_expire(vcpu);
+ kvm_apic_inject_pending_timer_irqs(apic);
+ return;
+ }
+
+ atomic_inc(&apic->lapic_timer.pending);
+ kvm_set_pending_timer(vcpu);
+
+ /*
+ * For x86, the atomic_inc() is serialized, thus
+ * using swait_active() is safe.
+ */
+ if (swait_active(q))
+ swake_up_one(q);
+}
+
static void start_sw_tscdeadline(struct kvm_lapic *apic)
{
struct kvm_timer *ktimer = &apic->lapic_timer;
@@ -1601,7 +1601,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
likely(ns > apic->lapic_timer.timer_advance_ns)) {
expire = ktime_add_ns(now, ns);
expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
- hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_PINNED);
+ hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS);
} else
apic_timer_expired(apic);
@@ -1648,16 +1648,6 @@ static bool set_target_expiration(struct kvm_lapic *apic)
limit_periodic_timer_frequency(apic);
- apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
- PRIx64 ", "
- "timer initial count 0x%x, period %lldns, "
- "expire @ 0x%016" PRIx64 ".\n", __func__,
- APIC_BUS_CYCLE_NS, ktime_to_ns(now),
- kvm_lapic_get_reg(apic, APIC_TMICT),
- apic->lapic_timer.period,
- ktime_to_ns(ktime_add_ns(now,
- apic->lapic_timer.period)));
-
apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period);
@@ -1703,7 +1693,7 @@ static void start_sw_period(struct kvm_lapic *apic)
hrtimer_start(&apic->lapic_timer.timer,
apic->lapic_timer.target_expiration,
- HRTIMER_MODE_ABS_PINNED);
+ HRTIMER_MODE_ABS);
}
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
@@ -1860,8 +1850,6 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
if (lvt0_in_nmi_mode) {
- apic_debug("Receive NMI setting on APIC_LVT0 "
- "for cpu %d\n", apic->vcpu->vcpu_id);
atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
} else
atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
@@ -1975,8 +1963,6 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_TDCR: {
uint32_t old_divisor = apic->divide_count;
- if (val & 4)
- apic_debug("KVM_WRITE:TDCR %x\n", val);
kvm_lapic_set_reg(apic, APIC_TDCR, val);
update_divide_count(apic);
if (apic->divide_count != old_divisor &&
@@ -1988,10 +1974,8 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
break;
}
case APIC_ESR:
- if (apic_x2apic_mode(apic) && val != 0) {
- apic_debug("KVM_WRITE:ESR not zero %x\n", val);
+ if (apic_x2apic_mode(apic) && val != 0)
ret = 1;
- }
break;
case APIC_SELF_IPI:
@@ -2004,8 +1988,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
ret = 1;
break;
}
- if (ret)
- apic_debug("Local APIC Write to read-only register %x\n", reg);
+
return ret;
}
EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
@@ -2033,20 +2016,12 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
* 32/64/128 bits registers must be accessed thru 32 bits.
* Refer SDM 8.4.1
*/
- if (len != 4 || (offset & 0xf)) {
- /* Don't shout loud, $infamous_os would cause only noise. */
- apic_debug("apic write: bad size=%d %lx\n", len, (long)address);
+ if (len != 4 || (offset & 0xf))
return 0;
- }
val = *(u32*)data;
- /* too common printing */
- if (offset != APIC_EOI)
- apic_debug("%s: offset 0x%x with length 0x%x, and value is "
- "0x%x\n", __func__, offset, len, val);
-
- kvm_lapic_reg_write(apic, offset, val);
+ kvm_lapic_reg_write(apic, offset & 0xff0, val);
return 0;
}
@@ -2178,11 +2153,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
if ((value & MSR_IA32_APICBASE_ENABLE) &&
apic->base_address != APIC_DEFAULT_PHYS_BASE)
pr_warn_once("APIC base relocation is unsupported by KVM");
-
- /* with FSB delivery interrupt, we can restart APIC functionality */
- apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
- "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);
-
}
void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -2193,8 +2163,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
if (!apic)
return;
- apic_debug("%s\n", __func__);
-
/* Stop the timer in case it's a reset to an active apic */
hrtimer_cancel(&apic->lapic_timer.timer);
@@ -2247,11 +2215,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.apic_arb_prio = 0;
vcpu->arch.apic_attention = 0;
-
- apic_debug("%s: vcpu=%p, id=0x%x, base_msr="
- "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
- vcpu, kvm_lapic_get_reg(apic, APIC_ID),
- vcpu->arch.apic_base, apic->base_address);
}
/*
@@ -2323,7 +2286,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
struct kvm_lapic *apic;
ASSERT(vcpu != NULL);
- apic_debug("apic_init %d\n", vcpu->vcpu_id);
apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
if (!apic)
@@ -2340,7 +2302,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
apic->vcpu = vcpu;
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
- HRTIMER_MODE_ABS_PINNED);
+ HRTIMER_MODE_ABS);
apic->lapic_timer.timer.function = apic_timer_fn;
if (timer_advance_ns == -1) {
apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
@@ -2397,13 +2359,7 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic = vcpu->arch.apic;
if (atomic_read(&apic->lapic_timer.pending) > 0) {
- kvm_apic_local_deliver(apic, APIC_LVTT);
- if (apic_lvtt_tscdeadline(apic))
- apic->lapic_timer.tscdeadline = 0;
- if (apic_lvtt_oneshot(apic)) {
- apic->lapic_timer.tscdeadline = 0;
- apic->lapic_timer.target_expiration = 0;
- }
+ kvm_apic_inject_pending_timer_irqs(apic);
atomic_set(&apic->lapic_timer.pending, 0);
}
}
@@ -2525,12 +2481,13 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
{
struct hrtimer *timer;
- if (!lapic_in_kernel(vcpu))
+ if (!lapic_in_kernel(vcpu) ||
+ kvm_can_post_timer_interrupt(vcpu))
return;
timer = &vcpu->arch.apic->lapic_timer.timer;
if (hrtimer_cancel(timer))
- hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
+ hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
}
/*
@@ -2678,11 +2635,8 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
return 1;
- if (reg == APIC_DFR || reg == APIC_ICR2) {
- apic_debug("KVM_APIC_READ: read x2apic reserved register %x\n",
- reg);
+ if (reg == APIC_DFR || reg == APIC_ICR2)
return 1;
- }
if (kvm_lapic_reg_read(apic, reg, 4, &low))
return 1;
@@ -2780,8 +2734,6 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
/* evaluate pending_events before reading the vector */
smp_rmb();
sipi_vector = apic->sipi_vector;
- apic_debug("vcpu %d received sipi with vector # %x\n",
- vcpu->vcpu_id, sipi_vector);
kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 36747174e4a8..50053d2b8b7b 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -236,6 +236,7 @@ void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu);
void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu);
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu);
void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu);
+bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu);
static inline enum lapic_mode kvm_apic_mode(u64 apic_base)
{
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9a5814d8d194..8f72526e2f68 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4597,11 +4597,11 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
*/
/* Faults from writes to non-writable pages */
- u8 wf = (pfec & PFERR_WRITE_MASK) ? ~w : 0;
+ u8 wf = (pfec & PFERR_WRITE_MASK) ? (u8)~w : 0;
/* Faults from user mode accesses to supervisor pages */
- u8 uf = (pfec & PFERR_USER_MASK) ? ~u : 0;
+ u8 uf = (pfec & PFERR_USER_MASK) ? (u8)~u : 0;
/* Faults from fetches of non-executable pages*/
- u8 ff = (pfec & PFERR_FETCH_MASK) ? ~x : 0;
+ u8 ff = (pfec & PFERR_FETCH_MASK) ? (u8)~x : 0;
/* Faults from kernel mode fetches of user pages */
u8 smepf = 0;
/* Faults from kernel mode accesses of user pages */
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index aa5a2597305a..46875bbd0419 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -19,8 +19,8 @@
#include "lapic.h"
#include "pmu.h"
-/* This keeps the total size of the filter under 4k. */
-#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 63
+/* This is enough to filter the vast majority of currently defined events. */
+#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
/* NOTE:
* - Each perf counter is defined as "struct kvm_pmc";
@@ -131,8 +131,8 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
intr ? kvm_perf_overflow_intr :
kvm_perf_overflow, pmc);
if (IS_ERR(event)) {
- printk_once("kvm_pmu: event creation failed %ld\n",
- PTR_ERR(event));
+ pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
+ PTR_ERR(event), pmc->idx);
return;
}
@@ -206,12 +206,24 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
{
unsigned en_field = ctrl & 0x3;
bool pmi = ctrl & 0x8;
+ struct kvm_pmu_event_filter *filter;
+ struct kvm *kvm = pmc->vcpu->kvm;
pmc_stop_counter(pmc);
if (!en_field || !pmc_is_enabled(pmc))
return;
+ filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
+ if (filter) {
+ if (filter->action == KVM_PMU_EVENT_DENY &&
+ test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
+ return;
+ if (filter->action == KVM_PMU_EVENT_ALLOW &&
+ !test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
+ return;
+ }
+
pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
kvm_x86_ops->pmu_ops->find_fixed_event(idx),
!(en_field & 0x2), /* exclude user */
@@ -385,6 +397,9 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
tmp.action != KVM_PMU_EVENT_DENY)
return -EINVAL;
+ if (tmp.flags != 0)
+ return -EINVAL;
+
if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS)
return -E2BIG;
@@ -406,8 +421,8 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
mutex_unlock(&kvm->lock);
synchronize_srcu_expedited(&kvm->srcu);
- r = 0;
+ r = 0;
cleanup:
kfree(filter);
- return r;
+ return r;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 583b9fa656f3..19f69df96758 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -7128,13 +7128,41 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
{
- bool is_user, smap;
-
- is_user = svm_get_cpl(vcpu) == 3;
- smap = !kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
+ unsigned long cr4 = kvm_read_cr4(vcpu);
+ bool smep = cr4 & X86_CR4_SMEP;
+ bool smap = cr4 & X86_CR4_SMAP;
+ bool is_user = svm_get_cpl(vcpu) == 3;
/*
- * Detect and workaround Errata 1096 Fam_17h_00_0Fh
+ * Detect and workaround Errata 1096 Fam_17h_00_0Fh.
+ *
+ * Errata:
+ * When CPU raise #NPF on guest data access and vCPU CR4.SMAP=1, it is
+ * possible that CPU microcode implementing DecodeAssist will fail
+ * to read bytes of instruction which caused #NPF. In this case,
+ * GuestIntrBytes field of the VMCB on a VMEXIT will incorrectly
+ * return 0 instead of the correct guest instruction bytes.
+ *
+ * This happens because CPU microcode reading instruction bytes
+ * uses a special opcode which attempts to read data using CPL=0
+ * priviledges. The microcode reads CS:RIP and if it hits a SMAP
+ * fault, it gives up and returns no instruction bytes.
+ *
+ * Detection:
+ * We reach here in case CPU supports DecodeAssist, raised #NPF and
+ * returned 0 in GuestIntrBytes field of the VMCB.
+ * First, errata can only be triggered in case vCPU CR4.SMAP=1.
+ * Second, if vCPU CR4.SMEP=1, errata could only be triggered
+ * in case vCPU CPL==3 (Because otherwise guest would have triggered
+ * a SMEP fault instead of #NPF).
+ * Otherwise, vCPU CR4.SMEP=0, errata could be triggered by any vCPU CPL.
+ * As most guests enable SMAP if they have also enabled SMEP, use above
+ * logic in order to attempt minimize false-positive of detecting errata
+ * while still preserving all cases semantic correctness.
+ *
+ * Workaround:
+ * To determine what instruction the guest was executing, the hypervisor
+ * will have to decode the instruction at the instruction pointer.
*
* In non SEV guest, hypervisor will be able to read the guest
* memory to decode the instruction pointer when insn_len is zero
@@ -7145,11 +7173,11 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
* instruction pointer so we will not able to workaround it. Lets
* print the error and request to kill the guest.
*/
- if (is_user && smap) {
+ if (smap && (!smep || is_user)) {
if (!sev_guest(vcpu->kvm))
return true;
- pr_err_ratelimited("KVM: Guest triggered AMD Erratum 1096\n");
+ pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n");
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
}
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index bb509c254939..0f1378789bd0 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -194,6 +194,7 @@ static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
{
secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
vmcs_write64(VMCS_LINK_POINTER, -1ull);
+ vmx->nested.need_vmcs12_to_shadow_sync = false;
}
static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
@@ -1341,6 +1342,9 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
unsigned long val;
int i;
+ if (WARN_ON(!shadow_vmcs))
+ return;
+
preempt_disable();
vmcs_load(shadow_vmcs);
@@ -1373,6 +1377,9 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
unsigned long val;
int i, q;
+ if (WARN_ON(!shadow_vmcs))
+ return;
+
vmcs_load(shadow_vmcs);
for (q = 0; q < ARRAY_SIZE(fields); q++) {
@@ -4194,7 +4201,10 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
* mode, e.g. a 32-bit address size can yield a 64-bit virtual
* address when using FS/GS with a non-zero base.
*/
- *ret = s.base + off;
+ if (seg_reg == VCPU_SREG_FS || seg_reg == VCPU_SREG_GS)
+ *ret = s.base + off;
+ else
+ *ret = off;
/* Long mode: #GP(0)/#SS(0) if the memory address is in a
* non-canonical form. This is the only check on the memory
@@ -4433,7 +4443,6 @@ static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu)
/* copy to memory all shadowed fields in case
they were modified */
copy_shadow_to_vmcs12(vmx);
- vmx->nested.need_vmcs12_to_shadow_sync = false;
vmx_disable_shadow_vmcs(vmx);
}
vmx->nested.posted_intr_nv = -1;
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 68d231d49c7a..4dea0e0e7e39 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -337,17 +337,22 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
static void intel_pmu_reset(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ struct kvm_pmc *pmc = NULL;
int i;
for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
- struct kvm_pmc *pmc = &pmu->gp_counters[i];
+ pmc = &pmu->gp_counters[i];
pmc_stop_counter(pmc);
pmc->counter = pmc->eventsel = 0;
}
- for (i = 0; i < INTEL_PMC_MAX_FIXED; i++)
- pmc_stop_counter(&pmu->fixed_counters[i]);
+ for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
+ pmc = &pmu->fixed_counters[i];
+
+ pmc_stop_counter(pmc);
+ pmc->counter = 0;
+ }
pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status =
pmu->global_ovf_ctrl = 0;
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index d4cb1945b2e3..4010d519eb8c 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -54,9 +54,9 @@ ENTRY(vmx_vmenter)
ret
3: cmpb $0, kvm_rebooting
- jne 4f
- call kvm_spurious_fault
-4: ret
+ je 4f
+ ret
+4: ud2
.pushsection .fixup, "ax"
5: jmp 3b
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 69536553446d..a279447eb75b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5829,6 +5829,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
}
if (unlikely(vmx->fail)) {
+ dump_vmcs();
vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
vcpu->run->fail_entry.hardware_entry_failure_reason
= vmcs_read32(VM_INSTRUCTION_ERROR);
@@ -7064,7 +7065,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer;
- if (kvm_mwait_in_guest(vcpu->kvm))
+ if (kvm_mwait_in_guest(vcpu->kvm) ||
+ kvm_can_post_timer_interrupt(vcpu))
return -EOPNOTSUPP;
vmx = to_vmx(vcpu);
@@ -7453,7 +7455,7 @@ static int enable_smi_window(struct kvm_vcpu *vcpu)
static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
{
- return 0;
+ return false;
}
static __init int hardware_setup(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4a0b74ecd1de..58305cf81182 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -51,6 +51,7 @@
#include <linux/kvm_irqfd.h>
#include <linux/irqbypass.h>
#include <linux/sched/stat.h>
+#include <linux/sched/isolation.h>
#include <linux/mem_encrypt.h>
#include <trace/events/kvm.h>
@@ -153,6 +154,9 @@ EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
static bool __read_mostly force_emulation_prefix = false;
module_param(force_emulation_prefix, bool, S_IRUGO);
+int __read_mostly pi_inject_timer = -1;
+module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
+
#define KVM_NR_SHARED_MSRS 16
struct kvm_shared_msrs_global {
@@ -1456,12 +1460,8 @@ static void update_pvclock_gtod(struct timekeeper *tk)
void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
{
- /*
- * Note: KVM_REQ_PENDING_TIMER is implicitly checked in
- * vcpu_enter_guest. This function is only called from
- * the physical CPU that is running vcpu.
- */
kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+ kvm_vcpu_kick(vcpu);
}
static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
@@ -1540,9 +1540,6 @@ static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
*pshift = shift;
*pmultiplier = div_frac(scaled64, tps32);
-
- pr_debug("%s: base_hz %llu => %llu, shift %d, mul %u\n",
- __func__, base_hz, scaled_hz, shift, *pmultiplier);
}
#ifdef CONFIG_X86_64
@@ -1785,12 +1782,10 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
if (!kvm_check_tsc_unstable()) {
offset = kvm->arch.cur_tsc_offset;
- pr_debug("kvm: matched tsc offset for %llu\n", data);
} else {
u64 delta = nsec_to_cycles(vcpu, elapsed);
data += delta;
offset = kvm_compute_tsc_offset(vcpu, data);
- pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
}
matched = true;
already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
@@ -1809,8 +1804,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
kvm->arch.cur_tsc_write = data;
kvm->arch.cur_tsc_offset = offset;
matched = false;
- pr_debug("kvm: new tsc generation %llu, clock %llu\n",
- kvm->arch.cur_tsc_generation, data);
}
/*
@@ -6911,7 +6904,6 @@ static void kvm_timer_init(void)
cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
}
- pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
kvmclock_cpu_online, kvmclock_cpu_down_prep);
@@ -7070,6 +7062,8 @@ int kvm_arch_init(void *opaque)
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
kvm_lapic_init();
+ if (pi_inject_timer == -1)
+ pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER);
#ifdef CONFIG_X86_64
pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index e08a12892e8b..6594020c0691 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -301,6 +301,8 @@ extern unsigned int min_timer_period_us;
extern bool enable_vmware_backdoor;
+extern int pi_inject_timer;
+
extern struct static_key kvm_no_apic_vcpu;
static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 378a1f70ae7d..4fe1601dbc5d 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -239,7 +239,7 @@ copy_user_handle_tail:
ret
_ASM_EXTABLE_UA(1b, 2b)
-ENDPROC(copy_user_handle_tail)
+END(copy_user_handle_tail)
/*
* copy_user_nocache - Uncached memory copy with exception handling
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 74fdff968ea3..304f958c27b2 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -115,29 +115,29 @@ ENDPROC(__get_user_8)
EXPORT_SYMBOL(__get_user_8)
+bad_get_user_clac:
+ ASM_CLAC
bad_get_user:
xor %edx,%edx
mov $(-EFAULT),%_ASM_AX
- ASM_CLAC
ret
-END(bad_get_user)
#ifdef CONFIG_X86_32
+bad_get_user_8_clac:
+ ASM_CLAC
bad_get_user_8:
xor %edx,%edx
xor %ecx,%ecx
mov $(-EFAULT),%_ASM_AX
- ASM_CLAC
ret
-END(bad_get_user_8)
#endif
- _ASM_EXTABLE_UA(1b, bad_get_user)
- _ASM_EXTABLE_UA(2b, bad_get_user)
- _ASM_EXTABLE_UA(3b, bad_get_user)
+ _ASM_EXTABLE_UA(1b, bad_get_user_clac)
+ _ASM_EXTABLE_UA(2b, bad_get_user_clac)
+ _ASM_EXTABLE_UA(3b, bad_get_user_clac)
#ifdef CONFIG_X86_64
- _ASM_EXTABLE_UA(4b, bad_get_user)
+ _ASM_EXTABLE_UA(4b, bad_get_user_clac)
#else
- _ASM_EXTABLE_UA(4b, bad_get_user_8)
- _ASM_EXTABLE_UA(5b, bad_get_user_8)
+ _ASM_EXTABLE_UA(4b, bad_get_user_8_clac)
+ _ASM_EXTABLE_UA(5b, bad_get_user_8_clac)
#endif
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index d2e5c9c39601..14bf78341d3c 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -32,8 +32,6 @@
*/
#define ENTER mov PER_CPU_VAR(current_task), %_ASM_BX
-#define EXIT ASM_CLAC ; \
- ret
.text
ENTRY(__put_user_1)
@@ -43,7 +41,8 @@ ENTRY(__put_user_1)
ASM_STAC
1: movb %al,(%_ASM_CX)
xor %eax,%eax
- EXIT
+ ASM_CLAC
+ ret
ENDPROC(__put_user_1)
EXPORT_SYMBOL(__put_user_1)
@@ -56,7 +55,8 @@ ENTRY(__put_user_2)
ASM_STAC
2: movw %ax,(%_ASM_CX)
xor %eax,%eax
- EXIT
+ ASM_CLAC
+ ret
ENDPROC(__put_user_2)
EXPORT_SYMBOL(__put_user_2)
@@ -69,7 +69,8 @@ ENTRY(__put_user_4)
ASM_STAC
3: movl %eax,(%_ASM_CX)
xor %eax,%eax
- EXIT
+ ASM_CLAC
+ ret
ENDPROC(__put_user_4)
EXPORT_SYMBOL(__put_user_4)
@@ -85,19 +86,21 @@ ENTRY(__put_user_8)
5: movl %edx,4(%_ASM_CX)
#endif
xor %eax,%eax
- EXIT
+ ASM_CLAC
+ RET
ENDPROC(__put_user_8)
EXPORT_SYMBOL(__put_user_8)
+bad_put_user_clac:
+ ASM_CLAC
bad_put_user:
movl $-EFAULT,%eax
- EXIT
-END(bad_put_user)
+ RET
- _ASM_EXTABLE_UA(1b, bad_put_user)
- _ASM_EXTABLE_UA(2b, bad_put_user)
- _ASM_EXTABLE_UA(3b, bad_put_user)
- _ASM_EXTABLE_UA(4b, bad_put_user)
+ _ASM_EXTABLE_UA(1b, bad_put_user_clac)
+ _ASM_EXTABLE_UA(2b, bad_put_user_clac)
+ _ASM_EXTABLE_UA(3b, bad_put_user_clac)
+ _ASM_EXTABLE_UA(4b, bad_put_user_clac)
#ifdef CONFIG_X86_32
- _ASM_EXTABLE_UA(5b, bad_put_user)
+ _ASM_EXTABLE_UA(5b, bad_put_user_clac)
#endif
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index e0e006f1624e..fff28c6f73a2 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -60,7 +60,7 @@ EXPORT_SYMBOL(clear_user);
* but reuse __memcpy_mcsafe in case a new read error is encountered.
* clac() is handled in _copy_to_iter_mcsafe().
*/
-__visible unsigned long
+__visible notrace unsigned long
mcsafe_handle_tail(char *to, char *from, unsigned len)
{
for (; len; --len, to++, from++) {
diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h
index a5a41ec58072..0c122226ca56 100644
--- a/arch/x86/math-emu/fpu_emu.h
+++ b/arch/x86/math-emu/fpu_emu.h
@@ -177,7 +177,7 @@ static inline void reg_copy(FPU_REG const *x, FPU_REG *y)
#define setexponentpos(x,y) { (*(short *)&((x)->exp)) = \
((y) + EXTENDED_Ebias) & 0x7fff; }
#define exponent16(x) (*(short *)&((x)->exp))
-#define setexponent16(x,y) { (*(short *)&((x)->exp)) = (y); }
+#define setexponent16(x,y) { (*(short *)&((x)->exp)) = (u16)(y); }
#define addexponent(x,y) { (*(short *)&((x)->exp)) += (y); }
#define stdexp(x) { (*(short *)&((x)->exp)) += EXTENDED_Ebias; }
diff --git a/arch/x86/math-emu/reg_constant.c b/arch/x86/math-emu/reg_constant.c
index 8dc9095bab22..742619e94bdf 100644
--- a/arch/x86/math-emu/reg_constant.c
+++ b/arch/x86/math-emu/reg_constant.c
@@ -18,7 +18,7 @@
#include "control_w.h"
#define MAKE_REG(s, e, l, h) { l, h, \
- ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) }
+ (u16)((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) }
FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000);
#if 0
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index d1634c59ed56..6c46095cd0d9 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1490,9 +1490,8 @@ good_area:
NOKPROBE_SYMBOL(do_user_addr_fault);
/*
- * This routine handles page faults. It determines the address,
- * and the problem, and then passes it off to one of the appropriate
- * routines.
+ * Explicitly marked noinline such that the function tracer sees this as the
+ * page_fault entry point.
*/
static noinline void
__do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
@@ -1511,33 +1510,26 @@ __do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
}
NOKPROBE_SYMBOL(__do_page_fault);
-static nokprobe_inline void
-trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
- unsigned long error_code)
+static __always_inline void
+trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
{
+ if (!trace_pagefault_enabled())
+ return;
+
if (user_mode(regs))
trace_page_fault_user(address, regs, error_code);
else
trace_page_fault_kernel(address, regs, error_code);
}
-/*
- * We must have this function blacklisted from kprobes, tagged with notrace
- * and call read_cr2() before calling anything else. To avoid calling any
- * kind of tracing machinery before we've observed the CR2 value.
- *
- * exception_{enter,exit}() contains all sorts of tracepoints.
- */
-dotraplinkage void notrace
-do_page_fault(struct pt_regs *regs, unsigned long error_code)
+dotraplinkage void
+do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
{
- unsigned long address = read_cr2(); /* Get the faulting address */
enum ctx_state prev_state;
prev_state = exception_enter();
- if (trace_pagefault_enabled())
- trace_page_fault_entries(address, regs, error_code);
-
+ trace_page_fault_entries(regs, error_code, address);
__do_page_fault(regs, error_code, address);
exception_exit(prev_state);
}
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index e0df96fdfe46..fece30ca8b0c 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -15,6 +15,10 @@
#include <linux/dma-direct.h>
#include <linux/swiotlb.h>
#include <linux/mem_encrypt.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/dma-mapping.h>
#include <asm/tlbflush.h>
#include <asm/fixmap.h>
@@ -41,7 +45,7 @@ EXPORT_SYMBOL_GPL(sev_enable_key);
bool sev_enabled __section(.data);
/* Buffer used for early in-place encryption by BSP, no locking needed */
-static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE);
+static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE);
/*
* This routine does not change the underlying encryption setting of the
@@ -348,6 +352,32 @@ bool sev_active(void)
}
EXPORT_SYMBOL(sev_active);
+/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
+bool force_dma_unencrypted(struct device *dev)
+{
+ /*
+ * For SEV, all DMA must be to unencrypted addresses.
+ */
+ if (sev_active())
+ return true;
+
+ /*
+ * For SME, all DMA must be to unencrypted addresses if the
+ * device does not support DMA to addresses that include the
+ * encryption mask.
+ */
+ if (sme_active()) {
+ u64 dma_enc_mask = DMA_BIT_MASK(__ffs64(sme_me_mask));
+ u64 dma_dev_mask = min_not_zero(dev->coherent_dma_mask,
+ dev->bus_dma_mask);
+
+ if (dma_dev_mask <= dma_enc_mask)
+ return true;
+ }
+
+ return false;
+}
+
/* Architecture __weak replacement functions */
void __init mem_encrypt_free_decrypted_mem(void)
{
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index bed6bb93c965..7ceb32821093 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -998,7 +998,8 @@ void __init xen_setup_vcpu_info_placement(void)
__PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
pv_ops.irq.irq_enable =
__PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
- pv_ops.mmu.read_cr2 = xen_read_cr2_direct;
+ pv_ops.mmu.read_cr2 =
+ __PV_IS_CALLEE_SAVE(xen_read_cr2_direct);
}
}
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index f6e5eeecfc69..26e8b326966d 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1307,16 +1307,6 @@ static void xen_write_cr2(unsigned long cr2)
this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
}
-static unsigned long xen_read_cr2(void)
-{
- return this_cpu_read(xen_vcpu)->arch.cr2;
-}
-
-unsigned long xen_read_cr2_direct(void)
-{
- return this_cpu_read(xen_vcpu_info.arch.cr2);
-}
-
static noinline void xen_flush_tlb(void)
{
struct mmuext_op *op;
@@ -2397,7 +2387,7 @@ static void xen_leave_lazy_mmu(void)
}
static const struct pv_mmu_ops xen_mmu_ops __initconst = {
- .read_cr2 = xen_read_cr2,
+ .read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2),
.write_cr2 = xen_write_cr2,
.read_cr3 = xen_read_cr3,
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 8019edd0125c..be104eef80be 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -10,6 +10,7 @@
#include <asm/percpu.h>
#include <asm/processor-flags.h>
#include <asm/frame.h>
+#include <asm/asm.h>
#include <linux/linkage.h>
@@ -135,3 +136,18 @@ ENTRY(check_events)
FRAME_END
ret
ENDPROC(check_events)
+
+ENTRY(xen_read_cr2)
+ FRAME_BEGIN
+ _ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX
+ _ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX
+ FRAME_END
+ ret
+ ENDPROC(xen_read_cr2);
+
+ENTRY(xen_read_cr2_direct)
+ FRAME_BEGIN
+ _ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX
+ FRAME_END
+ ret
+ ENDPROC(xen_read_cr2_direct);
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 2f111f47ba98..45a441c33d6d 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -134,6 +134,9 @@ __visible void xen_irq_disable_direct(void);
__visible unsigned long xen_save_fl_direct(void);
__visible void xen_restore_fl_direct(unsigned long);
+__visible unsigned long xen_read_cr2(void);
+__visible unsigned long xen_read_cr2_direct(void);
+
/* These are not functions, and cannot be called normally */
__visible void xen_iret(void);
__visible void xen_sysret32(void);
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index c7a3d75fb308..2e72fc5af157 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -611,6 +611,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
struct Scsi_Host *shost;
struct iser_conn *iser_conn = NULL;
struct ib_conn *ib_conn;
+ struct ib_device *ib_dev;
u32 max_fr_sectors;
shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
@@ -641,16 +642,19 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
}
ib_conn = &iser_conn->ib_conn;
+ ib_dev = ib_conn->device->ib_device;
if (ib_conn->pi_support) {
- u32 sig_caps = ib_conn->device->ib_device->attrs.sig_prot_cap;
+ u32 sig_caps = ib_dev->attrs.sig_prot_cap;
scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP |
SHOST_DIX_GUARD_CRC);
}
- if (iscsi_host_add(shost,
- ib_conn->device->ib_device->dev.parent)) {
+ if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
+ shost->virt_boundary_mask = ~MASK_4K;
+
+ if (iscsi_host_add(shost, ib_dev->dev.parent)) {
mutex_unlock(&iser_conn->state_mutex);
goto free_host;
}
@@ -956,30 +960,6 @@ static umode_t iser_attr_is_visible(int param_type, int param)
return 0;
}
-static int iscsi_iser_slave_alloc(struct scsi_device *sdev)
-{
- struct iscsi_session *session;
- struct iser_conn *iser_conn;
- struct ib_device *ib_dev;
-
- mutex_lock(&unbind_iser_conn_mutex);
-
- session = starget_to_session(scsi_target(sdev))->dd_data;
- iser_conn = session->leadconn->dd_data;
- if (!iser_conn) {
- mutex_unlock(&unbind_iser_conn_mutex);
- return -ENOTCONN;
- }
- ib_dev = iser_conn->ib_conn.device->ib_device;
-
- if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
- blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K);
-
- mutex_unlock(&unbind_iser_conn_mutex);
-
- return 0;
-}
-
static struct scsi_host_template iscsi_iser_sht = {
.module = THIS_MODULE,
.name = "iSCSI Initiator over iSER",
@@ -992,7 +972,6 @@ static struct scsi_host_template iscsi_iser_sht = {
.eh_device_reset_handler= iscsi_eh_device_reset,
.eh_target_reset_handler = iscsi_eh_recover_target,
.target_alloc = iscsi_target_alloc,
- .slave_alloc = iscsi_iser_slave_alloc,
.proc_name = "iscsi_iser",
.this_id = -1,
.track_queue_depth = 1,
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index c7bd96edce80..b5960351bec0 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -3046,20 +3046,6 @@ static int srp_target_alloc(struct scsi_target *starget)
return 0;
}
-static int srp_slave_alloc(struct scsi_device *sdev)
-{
- struct Scsi_Host *shost = sdev->host;
- struct srp_target_port *target = host_to_target(shost);
- struct srp_device *srp_dev = target->srp_host->srp_dev;
- struct ib_device *ibdev = srp_dev->dev;
-
- if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
- blk_queue_virt_boundary(sdev->request_queue,
- ~srp_dev->mr_page_mask);
-
- return 0;
-}
-
static int srp_slave_configure(struct scsi_device *sdev)
{
struct Scsi_Host *shost = sdev->host;
@@ -3262,7 +3248,6 @@ static struct scsi_host_template srp_template = {
.name = "InfiniBand SRP initiator",
.proc_name = DRV_NAME,
.target_alloc = srp_target_alloc,
- .slave_alloc = srp_slave_alloc,
.slave_configure = srp_slave_configure,
.info = srp_target_info,
.queuecommand = srp_queuecommand,
@@ -3806,6 +3791,9 @@ static ssize_t srp_create_target(struct device *dev,
target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
target_host->max_segment_size = ib_dma_max_seg_size(ibdev);
+ if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
+ target_host->virt_boundary_mask = ~srp_dev->mr_page_mask;
+
target = host_to_target(target_host);
target->net = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
diff --git a/drivers/input/joystick/iforce/iforce-ff.c b/drivers/input/joystick/iforce/iforce-ff.c
index 4cadebd8b9c4..95c0348843e6 100644
--- a/drivers/input/joystick/iforce/iforce-ff.c
+++ b/drivers/input/joystick/iforce/iforce-ff.c
@@ -6,9 +6,6 @@
* USB/RS232 I-Force joysticks and wheels.
*/
-/*
- */
-
#include "iforce.h"
/*
diff --git a/drivers/input/joystick/iforce/iforce-main.c b/drivers/input/joystick/iforce/iforce-main.c
index 9a5f90da06ec..b2a68bc9f0b4 100644
--- a/drivers/input/joystick/iforce/iforce-main.c
+++ b/drivers/input/joystick/iforce/iforce-main.c
@@ -6,9 +6,6 @@
* USB/RS232 I-Force joysticks and wheels.
*/
-/*
- */
-
#include <asm/unaligned.h>
#include "iforce.h"
diff --git a/drivers/input/joystick/iforce/iforce-packets.c b/drivers/input/joystick/iforce/iforce-packets.c
index b313e38b2c3a..763642c8cee9 100644
--- a/drivers/input/joystick/iforce/iforce-packets.c
+++ b/drivers/input/joystick/iforce/iforce-packets.c
@@ -6,9 +6,6 @@
* USB/RS232 I-Force joysticks and wheels.
*/
-/*
- */
-
#include <asm/unaligned.h>
#include "iforce.h"
diff --git a/drivers/input/joystick/iforce/iforce-serio.c b/drivers/input/joystick/iforce/iforce-serio.c
index bbe31e0b759f..f95a81b9fac7 100644
--- a/drivers/input/joystick/iforce/iforce-serio.c
+++ b/drivers/input/joystick/iforce/iforce-serio.c
@@ -6,9 +6,6 @@
* USB/RS232 I-Force joysticks and wheels.
*/
-/*
- */
-
#include <linux/serio.h>
#include "iforce.h"
diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c
index ade376bfb79f..29abfeeef9a5 100644
--- a/drivers/input/joystick/iforce/iforce-usb.c
+++ b/drivers/input/joystick/iforce/iforce-usb.c
@@ -6,9 +6,6 @@
* USB/RS232 I-Force joysticks and wheels.
*/
-/*
- */
-
#include <linux/usb.h>
#include "iforce.h"
diff --git a/drivers/input/joystick/iforce/iforce.h b/drivers/input/joystick/iforce/iforce.h
index 9cfa460466aa..6aa761ebbdf7 100644
--- a/drivers/input/joystick/iforce/iforce.h
+++ b/drivers/input/joystick/iforce/iforce.h
@@ -6,9 +6,6 @@
* USB/RS232 I-Force joysticks and wheels.
*/
-/*
- */
-
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/input.h>
diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index 7c4f19dab34f..8e9c3ea9d5e7 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -71,6 +71,22 @@ config KEYBOARD_AMIGA
config ATARI_KBD_CORE
bool
+config KEYBOARD_APPLESPI
+ tristate "Apple SPI keyboard and trackpad"
+ depends on ACPI && EFI
+ depends on SPI
+ depends on X86 || COMPILE_TEST
+ help
+ Say Y here if you are running Linux on any Apple MacBook8,1 or later,
+ or any MacBookPro13,* or MacBookPro14,*.
+
+ You will also need to enable appropriate SPI master controllers:
+ spi_pxa2xx_platform and spi_pxa2xx_pci for MacBook8,1, and
+ spi_pxa2xx_platform and intel_lpss_pci for the rest.
+
+ To compile this driver as a module, choose M here: the
+ module will be called applespi.
+
config KEYBOARD_ATARI
tristate "Atari keyboard"
depends on ATARI
diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile
index f0291ca39f62..06a0af6efeae 100644
--- a/drivers/input/keyboard/Makefile
+++ b/drivers/input/keyboard/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_KEYBOARD_ADP5520) += adp5520-keys.o
obj-$(CONFIG_KEYBOARD_ADP5588) += adp5588-keys.o
obj-$(CONFIG_KEYBOARD_ADP5589) += adp5589-keys.o
obj-$(CONFIG_KEYBOARD_AMIGA) += amikbd.o
+obj-$(CONFIG_KEYBOARD_APPLESPI) += applespi.o
obj-$(CONFIG_KEYBOARD_ATARI) += atakbd.o
obj-$(CONFIG_KEYBOARD_ATKBD) += atkbd.o
obj-$(CONFIG_KEYBOARD_BCM) += bcm-keypad.o
diff --git a/drivers/input/keyboard/adp5589-keys.c b/drivers/input/keyboard/adp5589-keys.c
index 4c05c70a8cf3..4f96a4a99e5b 100644
--- a/drivers/input/keyboard/adp5589-keys.c
+++ b/drivers/input/keyboard/adp5589-keys.c
@@ -505,6 +505,7 @@ static int adp5589_gpio_add(struct adp5589_kpad *kpad)
if (!gpio_data)
return 0;
+ kpad->gc.parent = dev;
kpad->gc.ngpio = adp5589_build_gpiomap(kpad, pdata);
if (kpad->gc.ngpio == 0) {
dev_info(dev, "No unused gpios left to export\n");
diff --git a/drivers/input/keyboard/applespi.c b/drivers/input/keyboard/applespi.c
new file mode 100644
index 000000000000..548737e7aeda
--- /dev/null
+++ b/drivers/input/keyboard/applespi.c
@@ -0,0 +1,1977 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MacBook (Pro) SPI keyboard and touchpad driver
+ *
+ * Copyright (c) 2015-2018 Federico Lorenzi
+ * Copyright (c) 2017-2018 Ronald Tschalär
+ */
+
+/*
+ * The keyboard and touchpad controller on the MacBookAir6, MacBookPro12,
+ * MacBook8 and newer can be driven either by USB or SPI. However the USB
+ * pins are only connected on the MacBookAir6 and 7 and the MacBookPro12.
+ * All others need this driver. The interface is selected using ACPI methods:
+ *
+ * * UIEN ("USB Interface Enable"): If invoked with argument 1, disables SPI
+ * and enables USB. If invoked with argument 0, disables USB.
+ * * UIST ("USB Interface Status"): Returns 1 if USB is enabled, 0 otherwise.
+ * * SIEN ("SPI Interface Enable"): If invoked with argument 1, disables USB
+ * and enables SPI. If invoked with argument 0, disables SPI.
+ * * SIST ("SPI Interface Status"): Returns 1 if SPI is enabled, 0 otherwise.
+ * * ISOL: Resets the four GPIO pins used for SPI. Intended to be invoked with
+ * argument 1, then once more with argument 0.
+ *
+ * UIEN and UIST are only provided on models where the USB pins are connected.
+ *
+ * SPI-based Protocol
+ * ------------------
+ *
+ * The device and driver exchange messages (struct message); each message is
+ * encapsulated in one or more packets (struct spi_packet). There are two types
+ * of exchanges: reads, and writes. A read is signaled by a GPE, upon which one
+ * message can be read from the device. A write exchange consists of writing a
+ * command message, immediately reading a short status packet, and then, upon
+ * receiving a GPE, reading the response message. Write exchanges cannot be
+ * interleaved, i.e. a new write exchange must not be started till the previous
+ * write exchange is complete. Whether a received message is part of a read or
+ * write exchange is indicated in the encapsulating packet's flags field.
+ *
+ * A single message may be too large to fit in a single packet (which has a
+ * fixed, 256-byte size). In that case it will be split over multiple,
+ * consecutive packets.
+ */
+
+#include <linux/acpi.h>
+#include <linux/crc16.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/efi.h>
+#include <linux/input.h>
+#include <linux/input/mt.h>
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/spi/spi.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+#include <asm/barrier.h>
+#include <asm/unaligned.h>
+
+#define CREATE_TRACE_POINTS
+#include "applespi.h"
+#include "applespi_trace.h"
+
+#define APPLESPI_PACKET_SIZE 256
+#define APPLESPI_STATUS_SIZE 4
+
+#define PACKET_TYPE_READ 0x20
+#define PACKET_TYPE_WRITE 0x40
+#define PACKET_DEV_KEYB 0x01
+#define PACKET_DEV_TPAD 0x02
+#define PACKET_DEV_INFO 0xd0
+
+#define MAX_ROLLOVER 6
+
+#define MAX_FINGERS 11
+#define MAX_FINGER_ORIENTATION 16384
+#define MAX_PKTS_PER_MSG 2
+
+#define KBD_BL_LEVEL_MIN 32U
+#define KBD_BL_LEVEL_MAX 255U
+#define KBD_BL_LEVEL_SCALE 1000000U
+#define KBD_BL_LEVEL_ADJ \
+ ((KBD_BL_LEVEL_MAX - KBD_BL_LEVEL_MIN) * KBD_BL_LEVEL_SCALE / 255U)
+
+#define EFI_BL_LEVEL_NAME L"KeyboardBacklightLevel"
+#define EFI_BL_LEVEL_GUID EFI_GUID(0xa076d2af, 0x9678, 0x4386, 0x8b, 0x58, 0x1f, 0xc8, 0xef, 0x04, 0x16, 0x19)
+
+#define APPLE_FLAG_FKEY 0x01
+
+#define SPI_RW_CHG_DELAY_US 100 /* from experimentation, in µs */
+
+#define SYNAPTICS_VENDOR_ID 0x06cb
+
+static unsigned int fnmode = 1;
+module_param(fnmode, uint, 0644);
+MODULE_PARM_DESC(fnmode, "Mode of Fn key on Apple keyboards (0 = disabled, [1] = fkeyslast, 2 = fkeysfirst)");
+
+static unsigned int fnremap;
+module_param(fnremap, uint, 0644);
+MODULE_PARM_DESC(fnremap, "Remap Fn key ([0] = no-remap; 1 = left-ctrl, 2 = left-shift, 3 = left-alt, 4 = left-meta, 6 = right-shift, 7 = right-alt, 8 = right-meta)");
+
+static bool iso_layout;
+module_param(iso_layout, bool, 0644);
+MODULE_PARM_DESC(iso_layout, "Enable/Disable hardcoded ISO-layout of the keyboard. ([0] = disabled, 1 = enabled)");
+
+static char touchpad_dimensions[40];
+module_param_string(touchpad_dimensions, touchpad_dimensions,
+ sizeof(touchpad_dimensions), 0444);
+MODULE_PARM_DESC(touchpad_dimensions, "The pixel dimensions of the touchpad, as XxY+W+H .");
+
+/**
+ * struct keyboard_protocol - keyboard message.
+ * message.type = 0x0110, message.length = 0x000a
+ *
+ * @unknown1: unknown
+ * @modifiers: bit-set of modifier/control keys pressed
+ * @unknown2: unknown
+ * @keys_pressed: the (non-modifier) keys currently pressed
+ * @fn_pressed: whether the fn key is currently pressed
+ * @crc16: crc over the whole message struct (message header +
+ * this struct) minus this @crc16 field
+ */
+struct keyboard_protocol {
+ u8 unknown1;
+ u8 modifiers;
+ u8 unknown2;
+ u8 keys_pressed[MAX_ROLLOVER];
+ u8 fn_pressed;
+ __le16 crc16;
+};
+
+/**
+ * struct tp_finger - single trackpad finger structure, le16-aligned
+ *
+ * @origin: zero when switching track finger
+ * @abs_x: absolute x coodinate
+ * @abs_y: absolute y coodinate
+ * @rel_x: relative x coodinate
+ * @rel_y: relative y coodinate
+ * @tool_major: tool area, major axis
+ * @tool_minor: tool area, minor axis
+ * @orientation: 16384 when point, else 15 bit angle
+ * @touch_major: touch area, major axis
+ * @touch_minor: touch area, minor axis
+ * @unused: zeros
+ * @pressure: pressure on forcetouch touchpad
+ * @multi: one finger: varies, more fingers: constant
+ * @crc16: on last finger: crc over the whole message struct
+ * (i.e. message header + this struct) minus the last
+ * @crc16 field; unknown on all other fingers.
+ */
+struct tp_finger {
+ __le16 origin;
+ __le16 abs_x;
+ __le16 abs_y;
+ __le16 rel_x;
+ __le16 rel_y;
+ __le16 tool_major;
+ __le16 tool_minor;
+ __le16 orientation;
+ __le16 touch_major;
+ __le16 touch_minor;
+ __le16 unused[2];
+ __le16 pressure;
+ __le16 multi;
+ __le16 crc16;
+};
+
+/**
+ * struct touchpad_protocol - touchpad message.
+ * message.type = 0x0210
+ *
+ * @unknown1: unknown
+ * @clicked: 1 if a button-click was detected, 0 otherwise
+ * @unknown2: unknown
+ * @number_of_fingers: the number of fingers being reported in @fingers
+ * @clicked2: same as @clicked
+ * @unknown3: unknown
+ * @fingers: the data for each finger
+ */
+struct touchpad_protocol {
+ u8 unknown1[1];
+ u8 clicked;
+ u8 unknown2[28];
+ u8 number_of_fingers;
+ u8 clicked2;
+ u8 unknown3[16];
+ struct tp_finger fingers[0];
+};
+
+/**
+ * struct command_protocol_tp_info - get touchpad info.
+ * message.type = 0x1020, message.length = 0x0000
+ *
+ * @crc16: crc over the whole message struct (message header +
+ * this struct) minus this @crc16 field
+ */
+struct command_protocol_tp_info {
+ __le16 crc16;
+};
+
+/**
+ * struct touchpad_info - touchpad info response.
+ * message.type = 0x1020, message.length = 0x006e
+ *
+ * @unknown1: unknown
+ * @model_flags: flags (vary by model number, but significance otherwise
+ * unknown)
+ * @model_no: the touchpad model number
+ * @unknown2: unknown
+ * @crc16: crc over the whole message struct (message header +
+ * this struct) minus this @crc16 field
+ */
+struct touchpad_info_protocol {
+ u8 unknown1[105];
+ u8 model_flags;
+ u8 model_no;
+ u8 unknown2[3];
+ __le16 crc16;
+};
+
+/**
+ * struct command_protocol_mt_init - initialize multitouch.
+ * message.type = 0x0252, message.length = 0x0002
+ *
+ * @cmd: value: 0x0102
+ * @crc16: crc over the whole message struct (message header +
+ * this struct) minus this @crc16 field
+ */
+struct command_protocol_mt_init {
+ __le16 cmd;
+ __le16 crc16;
+};
+
+/**
+ * struct command_protocol_capsl - toggle caps-lock led
+ * message.type = 0x0151, message.length = 0x0002
+ *
+ * @unknown: value: 0x01 (length?)
+ * @led: 0 off, 2 on
+ * @crc16: crc over the whole message struct (message header +
+ * this struct) minus this @crc16 field
+ */
+struct command_protocol_capsl {
+ u8 unknown;
+ u8 led;
+ __le16 crc16;
+};
+
+/**
+ * struct command_protocol_bl - set keyboard backlight brightness
+ * message.type = 0xB051, message.length = 0x0006
+ *
+ * @const1: value: 0x01B0
+ * @level: the brightness level to set
+ * @const2: value: 0x0001 (backlight off), 0x01F4 (backlight on)
+ * @crc16: crc over the whole message struct (message header +
+ * this struct) minus this @crc16 field
+ */
+struct command_protocol_bl {
+ __le16 const1;
+ __le16 level;
+ __le16 const2;
+ __le16 crc16;
+};
+
+/**
+ * struct message - a complete spi message.
+ *
+ * Each message begins with fixed header, followed by a message-type specific
+ * payload, and ends with a 16-bit crc. Because of the varying lengths of the
+ * payload, the crc is defined at the end of each payload struct, rather than
+ * in this struct.
+ *
+ * @type: the message type
+ * @zero: always 0
+ * @counter: incremented on each message, rolls over after 255; there is a
+ * separate counter for each message type.
+ * @rsp_buf_len:response buffer length (the exact nature of this field is quite
+ * speculative). On a request/write this is often the same as
+ * @length, though in some cases it has been seen to be much larger
+ * (e.g. 0x400); on a response/read this the same as on the
+ * request; for reads that are not responses it is 0.
+ * @length: length of the remainder of the data in the whole message
+ * structure (after re-assembly in case of being split over
+ * multiple spi-packets), minus the trailing crc. The total size
+ * of the message struct is therefore @length + 10.
+ */
+struct message {
+ __le16 type;
+ u8 zero;
+ u8 counter;
+ __le16 rsp_buf_len;
+ __le16 length;
+ union {
+ struct keyboard_protocol keyboard;
+ struct touchpad_protocol touchpad;
+ struct touchpad_info_protocol tp_info;
+ struct command_protocol_tp_info tp_info_command;
+ struct command_protocol_mt_init init_mt_command;
+ struct command_protocol_capsl capsl_command;
+ struct command_protocol_bl bl_command;
+ u8 data[0];
+ };
+};
+
+/* type + zero + counter + rsp_buf_len + length */
+#define MSG_HEADER_SIZE 8
+
+/**
+ * struct spi_packet - a complete spi packet; always 256 bytes. This carries
+ * the (parts of the) message in the data. But note that this does not
+ * necessarily contain a complete message, as in some cases (e.g. many
+ * fingers pressed) the message is split over multiple packets (see the
+ * @offset, @remaining, and @length fields). In general the data parts in
+ * spi_packet's are concatenated until @remaining is 0, and the result is an
+ * message.
+ *
+ * @flags: 0x40 = write (to device), 0x20 = read (from device); note that
+ * the response to a write still has 0x40.
+ * @device: 1 = keyboard, 2 = touchpad
+ * @offset: specifies the offset of this packet's data in the complete
+ * message; i.e. > 0 indicates this is a continuation packet (in
+ * the second packet for a message split over multiple packets
+ * this would then be the same as the @length in the first packet)
+ * @remaining: number of message bytes remaining in subsequents packets (in
+ * the first packet of a message split over two packets this would
+ * then be the same as the @length in the second packet)
+ * @length: length of the valid data in the @data in this packet
+ * @data: all or part of a message
+ * @crc16: crc over this whole structure minus this @crc16 field. This
+ * covers just this packet, even on multi-packet messages (in
+ * contrast to the crc in the message).
+ */
+struct spi_packet {
+ u8 flags;
+ u8 device;
+ __le16 offset;
+ __le16 remaining;
+ __le16 length;
+ u8 data[246];
+ __le16 crc16;
+};
+
+struct spi_settings {
+ u64 spi_cs_delay; /* cs-to-clk delay in us */
+ u64 reset_a2r_usec; /* active-to-receive delay? */
+ u64 reset_rec_usec; /* ? (cur val: 10) */
+};
+
+/* this mimics struct drm_rect */
+struct applespi_tp_info {
+ int x_min;
+ int y_min;
+ int x_max;
+ int y_max;
+};
+
+struct applespi_data {
+ struct spi_device *spi;
+ struct spi_settings spi_settings;
+ struct input_dev *keyboard_input_dev;
+ struct input_dev *touchpad_input_dev;
+
+ u8 *tx_buffer;
+ u8 *tx_status;
+ u8 *rx_buffer;
+
+ u8 *msg_buf;
+ unsigned int saved_msg_len;
+
+ struct applespi_tp_info tp_info;
+
+ u8 last_keys_pressed[MAX_ROLLOVER];
+ u8 last_keys_fn_pressed[MAX_ROLLOVER];
+ u8 last_fn_pressed;
+ struct input_mt_pos pos[MAX_FINGERS];
+ int slots[MAX_FINGERS];
+ int gpe;
+ acpi_handle sien;
+ acpi_handle sist;
+
+ struct spi_transfer dl_t;
+ struct spi_transfer rd_t;
+ struct spi_message rd_m;
+
+ struct spi_transfer ww_t;
+ struct spi_transfer wd_t;
+ struct spi_transfer wr_t;
+ struct spi_transfer st_t;
+ struct spi_message wr_m;
+
+ bool want_tp_info_cmd;
+ bool want_mt_init_cmd;
+ bool want_cl_led_on;
+ bool have_cl_led_on;
+ unsigned int want_bl_level;
+ unsigned int have_bl_level;
+ unsigned int cmd_msg_cntr;
+ /* lock to protect the above parameters and flags below */
+ spinlock_t cmd_msg_lock;
+ bool cmd_msg_queued;
+ enum applespi_evt_type cmd_evt_type;
+
+ struct led_classdev backlight_info;
+
+ bool suspended;
+ bool drain;
+ wait_queue_head_t drain_complete;
+ bool read_active;
+ bool write_active;
+
+ struct work_struct work;
+ struct touchpad_info_protocol rcvd_tp_info;
+
+ struct dentry *debugfs_root;
+ bool debug_tp_dim;
+ char tp_dim_val[40];
+ int tp_dim_min_x;
+ int tp_dim_max_x;
+ int tp_dim_min_y;
+ int tp_dim_max_y;
+};
+
+static const unsigned char applespi_scancodes[] = {
+ 0, 0, 0, 0,
+ KEY_A, KEY_B, KEY_C, KEY_D, KEY_E, KEY_F, KEY_G, KEY_H, KEY_I, KEY_J,
+ KEY_K, KEY_L, KEY_M, KEY_N, KEY_O, KEY_P, KEY_Q, KEY_R, KEY_S, KEY_T,
+ KEY_U, KEY_V, KEY_W, KEY_X, KEY_Y, KEY_Z,
+ KEY_1, KEY_2, KEY_3, KEY_4, KEY_5, KEY_6, KEY_7, KEY_8, KEY_9, KEY_0,
+ KEY_ENTER, KEY_ESC, KEY_BACKSPACE, KEY_TAB, KEY_SPACE, KEY_MINUS,
+ KEY_EQUAL, KEY_LEFTBRACE, KEY_RIGHTBRACE, KEY_BACKSLASH, 0,
+ KEY_SEMICOLON, KEY_APOSTROPHE, KEY_GRAVE, KEY_COMMA, KEY_DOT, KEY_SLASH,
+ KEY_CAPSLOCK,
+ KEY_F1, KEY_F2, KEY_F3, KEY_F4, KEY_F5, KEY_F6, KEY_F7, KEY_F8, KEY_F9,
+ KEY_F10, KEY_F11, KEY_F12, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ KEY_RIGHT, KEY_LEFT, KEY_DOWN, KEY_UP,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, KEY_102ND,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, KEY_RO, 0, KEY_YEN, 0, 0, 0, 0, 0,
+ 0, KEY_KATAKANAHIRAGANA, KEY_MUHENKAN
+};
+
+/*
+ * This must have exactly as many entries as there are bits in
+ * struct keyboard_protocol.modifiers .
+ */
+static const unsigned char applespi_controlcodes[] = {
+ KEY_LEFTCTRL,
+ KEY_LEFTSHIFT,
+ KEY_LEFTALT,
+ KEY_LEFTMETA,
+ 0,
+ KEY_RIGHTSHIFT,
+ KEY_RIGHTALT,
+ KEY_RIGHTMETA
+};
+
+struct applespi_key_translation {
+ u16 from;
+ u16 to;
+ u8 flags;
+};
+
+static const struct applespi_key_translation applespi_fn_codes[] = {
+ { KEY_BACKSPACE, KEY_DELETE },
+ { KEY_ENTER, KEY_INSERT },
+ { KEY_F1, KEY_BRIGHTNESSDOWN, APPLE_FLAG_FKEY },
+ { KEY_F2, KEY_BRIGHTNESSUP, APPLE_FLAG_FKEY },
+ { KEY_F3, KEY_SCALE, APPLE_FLAG_FKEY },
+ { KEY_F4, KEY_DASHBOARD, APPLE_FLAG_FKEY },
+ { KEY_F5, KEY_KBDILLUMDOWN, APPLE_FLAG_FKEY },
+ { KEY_F6, KEY_KBDILLUMUP, APPLE_FLAG_FKEY },
+ { KEY_F7, KEY_PREVIOUSSONG, APPLE_FLAG_FKEY },
+ { KEY_F8, KEY_PLAYPAUSE, APPLE_FLAG_FKEY },
+ { KEY_F9, KEY_NEXTSONG, APPLE_FLAG_FKEY },
+ { KEY_F10, KEY_MUTE, APPLE_FLAG_FKEY },
+ { KEY_F11, KEY_VOLUMEDOWN, APPLE_FLAG_FKEY },
+ { KEY_F12, KEY_VOLUMEUP, APPLE_FLAG_FKEY },
+ { KEY_RIGHT, KEY_END },
+ { KEY_LEFT, KEY_HOME },
+ { KEY_DOWN, KEY_PAGEDOWN },
+ { KEY_UP, KEY_PAGEUP },
+ { }
+};
+
+static const struct applespi_key_translation apple_iso_keyboard[] = {
+ { KEY_GRAVE, KEY_102ND },
+ { KEY_102ND, KEY_GRAVE },
+ { }
+};
+
+struct applespi_tp_model_info {
+ u16 model;
+ struct applespi_tp_info tp_info;
+};
+
+static const struct applespi_tp_model_info applespi_tp_models[] = {
+ {
+ .model = 0x04, /* MB8 MB9 MB10 */
+ .tp_info = { -5087, -182, 5579, 6089 },
+ },
+ {
+ .model = 0x05, /* MBP13,1 MBP13,2 MBP14,1 MBP14,2 */
+ .tp_info = { -6243, -170, 6749, 7685 },
+ },
+ {
+ .model = 0x06, /* MBP13,3 MBP14,3 */
+ .tp_info = { -7456, -163, 7976, 9283 },
+ },
+ {}
+};
+
+typedef void (*applespi_trace_fun)(enum applespi_evt_type,
+ enum applespi_pkt_type, u8 *, size_t);
+
+static applespi_trace_fun applespi_get_trace_fun(enum applespi_evt_type type)
+{
+ switch (type) {
+ case ET_CMD_TP_INI:
+ return trace_applespi_tp_ini_cmd;
+ case ET_CMD_BL:
+ return trace_applespi_backlight_cmd;
+ case ET_CMD_CL:
+ return trace_applespi_caps_lock_cmd;
+ case ET_RD_KEYB:
+ return trace_applespi_keyboard_data;
+ case ET_RD_TPAD:
+ return trace_applespi_touchpad_data;
+ case ET_RD_UNKN:
+ return trace_applespi_unknown_data;
+ default:
+ WARN_ONCE(1, "Unknown msg type %d", type);
+ return trace_applespi_unknown_data;
+ }
+}
+
+static void applespi_setup_read_txfrs(struct applespi_data *applespi)
+{
+ struct spi_message *msg = &applespi->rd_m;
+ struct spi_transfer *dl_t = &applespi->dl_t;
+ struct spi_transfer *rd_t = &applespi->rd_t;
+
+ memset(dl_t, 0, sizeof(*dl_t));
+ memset(rd_t, 0, sizeof(*rd_t));
+
+ dl_t->delay_usecs = applespi->spi_settings.spi_cs_delay;
+
+ rd_t->rx_buf = applespi->rx_buffer;
+ rd_t->len = APPLESPI_PACKET_SIZE;
+
+ spi_message_init(msg);
+ spi_message_add_tail(dl_t, msg);
+ spi_message_add_tail(rd_t, msg);
+}
+
+static void applespi_setup_write_txfrs(struct applespi_data *applespi)
+{
+ struct spi_message *msg = &applespi->wr_m;
+ struct spi_transfer *wt_t = &applespi->ww_t;
+ struct spi_transfer *dl_t = &applespi->wd_t;
+ struct spi_transfer *wr_t = &applespi->wr_t;
+ struct spi_transfer *st_t = &applespi->st_t;
+
+ memset(wt_t, 0, sizeof(*wt_t));
+ memset(dl_t, 0, sizeof(*dl_t));
+ memset(wr_t, 0, sizeof(*wr_t));
+ memset(st_t, 0, sizeof(*st_t));
+
+ /*
+ * All we need here is a delay at the beginning of the message before
+ * asserting cs. But the current spi API doesn't support this, so we
+ * end up with an extra unnecessary (but harmless) cs assertion and
+ * deassertion.
+ */
+ wt_t->delay_usecs = SPI_RW_CHG_DELAY_US;
+ wt_t->cs_change = 1;
+
+ dl_t->delay_usecs = applespi->spi_settings.spi_cs_delay;
+
+ wr_t->tx_buf = applespi->tx_buffer;
+ wr_t->len = APPLESPI_PACKET_SIZE;
+ wr_t->delay_usecs = SPI_RW_CHG_DELAY_US;
+
+ st_t->rx_buf = applespi->tx_status;
+ st_t->len = APPLESPI_STATUS_SIZE;
+
+ spi_message_init(msg);
+ spi_message_add_tail(wt_t, msg);
+ spi_message_add_tail(dl_t, msg);
+ spi_message_add_tail(wr_t, msg);
+ spi_message_add_tail(st_t, msg);
+}
+
+static int applespi_async(struct applespi_data *applespi,
+ struct spi_message *message, void (*complete)(void *))
+{
+ message->complete = complete;
+ message->context = applespi;
+
+ return spi_async(applespi->spi, message);
+}
+
+static inline bool applespi_check_write_status(struct applespi_data *applespi,
+ int sts)
+{
+ static u8 status_ok[] = { 0xac, 0x27, 0x68, 0xd5 };
+
+ if (sts < 0) {
+ dev_warn(&applespi->spi->dev, "Error writing to device: %d\n",
+ sts);
+ return false;
+ }
+
+ if (memcmp(applespi->tx_status, status_ok, APPLESPI_STATUS_SIZE)) {
+ dev_warn(&applespi->spi->dev, "Error writing to device: %*ph\n",
+ APPLESPI_STATUS_SIZE, applespi->tx_status);
+ return false;
+ }
+
+ return true;
+}
+
+static int applespi_get_spi_settings(struct applespi_data *applespi)
+{
+ struct acpi_device *adev = ACPI_COMPANION(&applespi->spi->dev);
+ const union acpi_object *o;
+ struct spi_settings *settings = &applespi->spi_settings;
+
+ if (!acpi_dev_get_property(adev, "spiCSDelay", ACPI_TYPE_BUFFER, &o))
+ settings->spi_cs_delay = *(u64 *)o->buffer.pointer;
+ else
+ dev_warn(&applespi->spi->dev,
+ "Property spiCSDelay not found\n");
+
+ if (!acpi_dev_get_property(adev, "resetA2RUsec", ACPI_TYPE_BUFFER, &o))
+ settings->reset_a2r_usec = *(u64 *)o->buffer.pointer;
+ else
+ dev_warn(&applespi->spi->dev,
+ "Property resetA2RUsec not found\n");
+
+ if (!acpi_dev_get_property(adev, "resetRecUsec", ACPI_TYPE_BUFFER, &o))
+ settings->reset_rec_usec = *(u64 *)o->buffer.pointer;
+ else
+ dev_warn(&applespi->spi->dev,
+ "Property resetRecUsec not found\n");
+
+ dev_dbg(&applespi->spi->dev,
+ "SPI settings: spi_cs_delay=%llu reset_a2r_usec=%llu reset_rec_usec=%llu\n",
+ settings->spi_cs_delay, settings->reset_a2r_usec,
+ settings->reset_rec_usec);
+
+ return 0;
+}
+
+static int applespi_setup_spi(struct applespi_data *applespi)
+{
+ int sts;
+
+ sts = applespi_get_spi_settings(applespi);
+ if (sts)
+ return sts;
+
+ spin_lock_init(&applespi->cmd_msg_lock);
+ init_waitqueue_head(&applespi->drain_complete);
+
+ return 0;
+}
+
+static int applespi_enable_spi(struct applespi_data *applespi)
+{
+ acpi_status acpi_sts;
+ unsigned long long spi_status;
+
+ /* check if SPI is already enabled, so we can skip the delay below */
+ acpi_sts = acpi_evaluate_integer(applespi->sist, NULL, NULL,
+ &spi_status);
+ if (ACPI_SUCCESS(acpi_sts) && spi_status)
+ return 0;
+
+ /* SIEN(1) will enable SPI communication */
+ acpi_sts = acpi_execute_simple_method(applespi->sien, NULL, 1);
+ if (ACPI_FAILURE(acpi_sts)) {
+ dev_err(&applespi->spi->dev, "SIEN failed: %s\n",
+ acpi_format_exception(acpi_sts));
+ return -ENODEV;
+ }
+
+ /*
+ * Allow the SPI interface to come up before returning. Without this
+ * delay, the SPI commands to enable multitouch mode may not reach
+ * the trackpad controller, causing pointer movement to break upon
+ * resume from sleep.
+ */
+ msleep(50);
+
+ return 0;
+}
+
+static int applespi_send_cmd_msg(struct applespi_data *applespi);
+
+static void applespi_msg_complete(struct applespi_data *applespi,
+ bool is_write_msg, bool is_read_compl)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&applespi->cmd_msg_lock, flags);
+
+ if (is_read_compl)
+ applespi->read_active = false;
+ if (is_write_msg)
+ applespi->write_active = false;
+
+ if (applespi->drain && !applespi->write_active)
+ wake_up_all(&applespi->drain_complete);
+
+ if (is_write_msg) {
+ applespi->cmd_msg_queued = false;
+ applespi_send_cmd_msg(applespi);
+ }
+
+ spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags);
+}
+
+static void applespi_async_write_complete(void *context)
+{
+ struct applespi_data *applespi = context;
+ enum applespi_evt_type evt_type = applespi->cmd_evt_type;
+
+ applespi_get_trace_fun(evt_type)(evt_type, PT_WRITE,
+ applespi->tx_buffer,
+ APPLESPI_PACKET_SIZE);
+ applespi_get_trace_fun(evt_type)(evt_type, PT_STATUS,
+ applespi->tx_status,
+ APPLESPI_STATUS_SIZE);
+
+ if (!applespi_check_write_status(applespi, applespi->wr_m.status)) {
+ /*
+ * If we got an error, we presumably won't get the expected
+ * response message either.
+ */
+ applespi_msg_complete(applespi, true, false);
+ }
+}
+
+static int applespi_send_cmd_msg(struct applespi_data *applespi)
+{
+ u16 crc;
+ int sts;
+ struct spi_packet *packet = (struct spi_packet *)applespi->tx_buffer;
+ struct message *message = (struct message *)packet->data;
+ u16 msg_len;
+ u8 device;
+
+ /* check if draining */
+ if (applespi->drain)
+ return 0;
+
+ /* check whether send is in progress */
+ if (applespi->cmd_msg_queued)
+ return 0;
+
+ /* set up packet */
+ memset(packet, 0, APPLESPI_PACKET_SIZE);
+
+ /* are we processing init commands? */
+ if (applespi->want_tp_info_cmd) {
+ applespi->want_tp_info_cmd = false;
+ applespi->want_mt_init_cmd = true;
+ applespi->cmd_evt_type = ET_CMD_TP_INI;
+
+ /* build init command */
+ device = PACKET_DEV_INFO;
+
+ message->type = cpu_to_le16(0x1020);
+ msg_len = sizeof(message->tp_info_command);
+
+ message->zero = 0x02;
+ message->rsp_buf_len = cpu_to_le16(0x0200);
+
+ } else if (applespi->want_mt_init_cmd) {
+ applespi->want_mt_init_cmd = false;
+ applespi->cmd_evt_type = ET_CMD_TP_INI;
+
+ /* build init command */
+ device = PACKET_DEV_TPAD;
+
+ message->type = cpu_to_le16(0x0252);
+ msg_len = sizeof(message->init_mt_command);
+
+ message->init_mt_command.cmd = cpu_to_le16(0x0102);
+
+ /* do we need caps-lock command? */
+ } else if (applespi->want_cl_led_on != applespi->have_cl_led_on) {
+ applespi->have_cl_led_on = applespi->want_cl_led_on;
+ applespi->cmd_evt_type = ET_CMD_CL;
+
+ /* build led command */
+ device = PACKET_DEV_KEYB;
+
+ message->type = cpu_to_le16(0x0151);
+ msg_len = sizeof(message->capsl_command);
+
+ message->capsl_command.unknown = 0x01;
+ message->capsl_command.led = applespi->have_cl_led_on ? 2 : 0;
+
+ /* do we need backlight command? */
+ } else if (applespi->want_bl_level != applespi->have_bl_level) {
+ applespi->have_bl_level = applespi->want_bl_level;
+ applespi->cmd_evt_type = ET_CMD_BL;
+
+ /* build command buffer */
+ device = PACKET_DEV_KEYB;
+
+ message->type = cpu_to_le16(0xB051);
+ msg_len = sizeof(message->bl_command);
+
+ message->bl_command.const1 = cpu_to_le16(0x01B0);
+ message->bl_command.level =
+ cpu_to_le16(applespi->have_bl_level);
+
+ if (applespi->have_bl_level > 0)
+ message->bl_command.const2 = cpu_to_le16(0x01F4);
+ else
+ message->bl_command.const2 = cpu_to_le16(0x0001);
+
+ /* everything's up-to-date */
+ } else {
+ return 0;
+ }
+
+ /* finalize packet */
+ packet->flags = PACKET_TYPE_WRITE;
+ packet->device = device;
+ packet->length = cpu_to_le16(MSG_HEADER_SIZE + msg_len);
+
+ message->counter = applespi->cmd_msg_cntr++ % (U8_MAX + 1);
+
+ message->length = cpu_to_le16(msg_len - 2);
+ if (!message->rsp_buf_len)
+ message->rsp_buf_len = message->length;
+
+ crc = crc16(0, (u8 *)message, le16_to_cpu(packet->length) - 2);
+ put_unaligned_le16(crc, &message->data[msg_len - 2]);
+
+ crc = crc16(0, (u8 *)packet, sizeof(*packet) - 2);
+ packet->crc16 = cpu_to_le16(crc);
+
+ /* send command */
+ sts = applespi_async(applespi, &applespi->wr_m,
+ applespi_async_write_complete);
+ if (sts) {
+ dev_warn(&applespi->spi->dev,
+ "Error queueing async write to device: %d\n", sts);
+ return sts;
+ }
+
+ applespi->cmd_msg_queued = true;
+ applespi->write_active = true;
+
+ return 0;
+}
+
+static void applespi_init(struct applespi_data *applespi, bool is_resume)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&applespi->cmd_msg_lock, flags);
+
+ if (is_resume)
+ applespi->want_mt_init_cmd = true;
+ else
+ applespi->want_tp_info_cmd = true;
+ applespi_send_cmd_msg(applespi);
+
+ spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags);
+}
+
+static int applespi_set_capsl_led(struct applespi_data *applespi,
+ bool capslock_on)
+{
+ unsigned long flags;
+ int sts;
+
+ spin_lock_irqsave(&applespi->cmd_msg_lock, flags);
+
+ applespi->want_cl_led_on = capslock_on;
+ sts = applespi_send_cmd_msg(applespi);
+
+ spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags);
+
+ return sts;
+}
+
+static void applespi_set_bl_level(struct led_classdev *led_cdev,
+ enum led_brightness value)
+{
+ struct applespi_data *applespi =
+ container_of(led_cdev, struct applespi_data, backlight_info);
+ unsigned long flags;
+
+ spin_lock_irqsave(&applespi->cmd_msg_lock, flags);
+
+ if (value == 0) {
+ applespi->want_bl_level = value;
+ } else {
+ /*
+ * The backlight does not turn on till level 32, so we scale
+ * the range here so that from a user's perspective it turns
+ * on at 1.
+ */
+ applespi->want_bl_level =
+ ((value * KBD_BL_LEVEL_ADJ) / KBD_BL_LEVEL_SCALE +
+ KBD_BL_LEVEL_MIN);
+ }
+
+ applespi_send_cmd_msg(applespi);
+
+ spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags);
+}
+
+static int applespi_event(struct input_dev *dev, unsigned int type,
+ unsigned int code, int value)
+{
+ struct applespi_data *applespi = input_get_drvdata(dev);
+
+ switch (type) {
+ case EV_LED:
+ applespi_set_capsl_led(applespi, !!test_bit(LED_CAPSL, dev->led));
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+/* lifted from the BCM5974 driver and renamed from raw2int */
+/* convert 16-bit little endian to signed integer */
+static inline int le16_to_int(__le16 x)
+{
+ return (signed short)le16_to_cpu(x);
+}
+
+static void applespi_debug_update_dimensions(struct applespi_data *applespi,
+ const struct tp_finger *f)
+{
+ applespi->tp_dim_min_x = min_t(int, applespi->tp_dim_min_x, f->abs_x);
+ applespi->tp_dim_max_x = max_t(int, applespi->tp_dim_max_x, f->abs_x);
+ applespi->tp_dim_min_y = min_t(int, applespi->tp_dim_min_y, f->abs_y);
+ applespi->tp_dim_max_y = max_t(int, applespi->tp_dim_max_y, f->abs_y);
+}
+
+static int applespi_tp_dim_open(struct inode *inode, struct file *file)
+{
+ struct applespi_data *applespi = inode->i_private;
+
+ file->private_data = applespi;
+
+ snprintf(applespi->tp_dim_val, sizeof(applespi->tp_dim_val),
+ "0x%.4x %dx%d+%u+%u\n",
+ applespi->touchpad_input_dev->id.product,
+ applespi->tp_dim_min_x, applespi->tp_dim_min_y,
+ applespi->tp_dim_max_x - applespi->tp_dim_min_x,
+ applespi->tp_dim_max_y - applespi->tp_dim_min_y);
+
+ return nonseekable_open(inode, file);
+}
+
+static ssize_t applespi_tp_dim_read(struct file *file, char __user *buf,
+ size_t len, loff_t *off)
+{
+ struct applespi_data *applespi = file->private_data;
+
+ return simple_read_from_buffer(buf, len, off, applespi->tp_dim_val,
+ strlen(applespi->tp_dim_val));
+}
+
+static const struct file_operations applespi_tp_dim_fops = {
+ .owner = THIS_MODULE,
+ .open = applespi_tp_dim_open,
+ .read = applespi_tp_dim_read,
+ .llseek = no_llseek,
+};
+
+static void report_finger_data(struct input_dev *input, int slot,
+ const struct input_mt_pos *pos,
+ const struct tp_finger *f)
+{
+ input_mt_slot(input, slot);
+ input_mt_report_slot_state(input, MT_TOOL_FINGER, true);
+
+ input_report_abs(input, ABS_MT_TOUCH_MAJOR,
+ le16_to_int(f->touch_major) << 1);
+ input_report_abs(input, ABS_MT_TOUCH_MINOR,
+ le16_to_int(f->touch_minor) << 1);
+ input_report_abs(input, ABS_MT_WIDTH_MAJOR,
+ le16_to_int(f->tool_major) << 1);
+ input_report_abs(input, ABS_MT_WIDTH_MINOR,
+ le16_to_int(f->tool_minor) << 1);
+ input_report_abs(input, ABS_MT_ORIENTATION,
+ MAX_FINGER_ORIENTATION - le16_to_int(f->orientation));
+ input_report_abs(input, ABS_MT_POSITION_X, pos->x);
+ input_report_abs(input, ABS_MT_POSITION_Y, pos->y);
+}
+
+static void report_tp_state(struct applespi_data *applespi,
+ struct touchpad_protocol *t)
+{
+ const struct tp_finger *f;
+ struct input_dev *input;
+ const struct applespi_tp_info *tp_info = &applespi->tp_info;
+ int i, n;
+
+ /* touchpad_input_dev is set async in worker */
+ input = smp_load_acquire(&applespi->touchpad_input_dev);
+ if (!input)
+ return; /* touchpad isn't initialized yet */
+
+ n = 0;
+
+ for (i = 0; i < t->number_of_fingers; i++) {
+ f = &t->fingers[i];
+ if (le16_to_int(f->touch_major) == 0)
+ continue;
+ applespi->pos[n].x = le16_to_int(f->abs_x);
+ applespi->pos[n].y = tp_info->y_min + tp_info->y_max -
+ le16_to_int(f->abs_y);
+ n++;
+
+ if (applespi->debug_tp_dim)
+ applespi_debug_update_dimensions(applespi, f);
+ }
+
+ input_mt_assign_slots(input, applespi->slots, applespi->pos, n, 0);
+
+ for (i = 0; i < n; i++)
+ report_finger_data(input, applespi->slots[i],
+ &applespi->pos[i], &t->fingers[i]);
+
+ input_mt_sync_frame(input);
+ input_report_key(input, BTN_LEFT, t->clicked);
+
+ input_sync(input);
+}
+
+static const struct applespi_key_translation *
+applespi_find_translation(const struct applespi_key_translation *table, u16 key)
+{
+ const struct applespi_key_translation *trans;
+
+ for (trans = table; trans->from; trans++)
+ if (trans->from == key)
+ return trans;
+
+ return NULL;
+}
+
+static unsigned int applespi_translate_fn_key(unsigned int key, int fn_pressed)
+{
+ const struct applespi_key_translation *trans;
+ int do_translate;
+
+ trans = applespi_find_translation(applespi_fn_codes, key);
+ if (trans) {
+ if (trans->flags & APPLE_FLAG_FKEY)
+ do_translate = (fnmode == 2 && fn_pressed) ||
+ (fnmode == 1 && !fn_pressed);
+ else
+ do_translate = fn_pressed;
+
+ if (do_translate)
+ key = trans->to;
+ }
+
+ return key;
+}
+
+static unsigned int applespi_translate_iso_layout(unsigned int key)
+{
+ const struct applespi_key_translation *trans;
+
+ trans = applespi_find_translation(apple_iso_keyboard, key);
+ if (trans)
+ key = trans->to;
+
+ return key;
+}
+
+static unsigned int applespi_code_to_key(u8 code, int fn_pressed)
+{
+ unsigned int key = applespi_scancodes[code];
+
+ if (fnmode)
+ key = applespi_translate_fn_key(key, fn_pressed);
+ if (iso_layout)
+ key = applespi_translate_iso_layout(key);
+ return key;
+}
+
+static void
+applespi_remap_fn_key(struct keyboard_protocol *keyboard_protocol)
+{
+ unsigned char tmp;
+ u8 bit = BIT((fnremap - 1) & 0x07);
+
+ if (!fnremap || fnremap > ARRAY_SIZE(applespi_controlcodes) ||
+ !applespi_controlcodes[fnremap - 1])
+ return;
+
+ tmp = keyboard_protocol->fn_pressed;
+ keyboard_protocol->fn_pressed = !!(keyboard_protocol->modifiers & bit);
+ if (tmp)
+ keyboard_protocol->modifiers |= bit;
+ else
+ keyboard_protocol->modifiers &= ~bit;
+}
+
+static void
+applespi_handle_keyboard_event(struct applespi_data *applespi,
+ struct keyboard_protocol *keyboard_protocol)
+{
+ unsigned int key;
+ int i;
+
+ compiletime_assert(ARRAY_SIZE(applespi_controlcodes) ==
+ sizeof_field(struct keyboard_protocol, modifiers) * 8,
+ "applespi_controlcodes has wrong number of entries");
+
+ /* check for rollover overflow, which is signalled by all keys == 1 */
+ if (!memchr_inv(keyboard_protocol->keys_pressed, 1, MAX_ROLLOVER))
+ return;
+
+ /* remap fn key if desired */
+ applespi_remap_fn_key(keyboard_protocol);
+
+ /* check released keys */
+ for (i = 0; i < MAX_ROLLOVER; i++) {
+ if (memchr(keyboard_protocol->keys_pressed,
+ applespi->last_keys_pressed[i], MAX_ROLLOVER))
+ continue; /* key is still pressed */
+
+ key = applespi_code_to_key(applespi->last_keys_pressed[i],
+ applespi->last_keys_fn_pressed[i]);
+ input_report_key(applespi->keyboard_input_dev, key, 0);
+ applespi->last_keys_fn_pressed[i] = 0;
+ }
+
+ /* check pressed keys */
+ for (i = 0; i < MAX_ROLLOVER; i++) {
+ if (keyboard_protocol->keys_pressed[i] <
+ ARRAY_SIZE(applespi_scancodes) &&
+ keyboard_protocol->keys_pressed[i] > 0) {
+ key = applespi_code_to_key(
+ keyboard_protocol->keys_pressed[i],
+ keyboard_protocol->fn_pressed);
+ input_report_key(applespi->keyboard_input_dev, key, 1);
+ applespi->last_keys_fn_pressed[i] =
+ keyboard_protocol->fn_pressed;
+ }
+ }
+
+ /* check control keys */
+ for (i = 0; i < ARRAY_SIZE(applespi_controlcodes); i++) {
+ if (keyboard_protocol->modifiers & BIT(i))
+ input_report_key(applespi->keyboard_input_dev,
+ applespi_controlcodes[i], 1);
+ else
+ input_report_key(applespi->keyboard_input_dev,
+ applespi_controlcodes[i], 0);
+ }
+
+ /* check function key */
+ if (keyboard_protocol->fn_pressed && !applespi->last_fn_pressed)
+ input_report_key(applespi->keyboard_input_dev, KEY_FN, 1);
+ else if (!keyboard_protocol->fn_pressed && applespi->last_fn_pressed)
+ input_report_key(applespi->keyboard_input_dev, KEY_FN, 0);
+ applespi->last_fn_pressed = keyboard_protocol->fn_pressed;
+
+ /* done */
+ input_sync(applespi->keyboard_input_dev);
+ memcpy(&applespi->last_keys_pressed, keyboard_protocol->keys_pressed,
+ sizeof(applespi->last_keys_pressed));
+}
+
+static const struct applespi_tp_info *applespi_find_touchpad_info(u8 model)
+{
+ const struct applespi_tp_model_info *info;
+
+ for (info = applespi_tp_models; info->model; info++) {
+ if (info->model == model)
+ return &info->tp_info;
+ }
+
+ return NULL;
+}
+
+static int
+applespi_register_touchpad_device(struct applespi_data *applespi,
+ struct touchpad_info_protocol *rcvd_tp_info)
+{
+ const struct applespi_tp_info *tp_info;
+ struct input_dev *touchpad_input_dev;
+ int sts;
+
+ /* set up touchpad dimensions */
+ tp_info = applespi_find_touchpad_info(rcvd_tp_info->model_no);
+ if (!tp_info) {
+ dev_warn(&applespi->spi->dev,
+ "Unknown touchpad model %x - falling back to MB8 touchpad\n",
+ rcvd_tp_info->model_no);
+ tp_info = &applespi_tp_models[0].tp_info;
+ }
+
+ applespi->tp_info = *tp_info;
+
+ if (touchpad_dimensions[0]) {
+ int x, y, w, h;
+
+ sts = sscanf(touchpad_dimensions, "%dx%d+%u+%u", &x, &y, &w, &h);
+ if (sts == 4) {
+ dev_info(&applespi->spi->dev,
+ "Overriding touchpad dimensions from module param\n");
+ applespi->tp_info.x_min = x;
+ applespi->tp_info.y_min = y;
+ applespi->tp_info.x_max = x + w;
+ applespi->tp_info.y_max = y + h;
+ } else {
+ dev_warn(&applespi->spi->dev,
+ "Invalid touchpad dimensions '%s': must be in the form XxY+W+H\n",
+ touchpad_dimensions);
+ touchpad_dimensions[0] = '\0';
+ }
+ }
+ if (!touchpad_dimensions[0]) {
+ snprintf(touchpad_dimensions, sizeof(touchpad_dimensions),
+ "%dx%d+%u+%u",
+ applespi->tp_info.x_min,
+ applespi->tp_info.y_min,
+ applespi->tp_info.x_max - applespi->tp_info.x_min,
+ applespi->tp_info.y_max - applespi->tp_info.y_min);
+ }
+
+ /* create touchpad input device */
+ touchpad_input_dev = devm_input_allocate_device(&applespi->spi->dev);
+ if (!touchpad_input_dev) {
+ dev_err(&applespi->spi->dev,
+ "Failed to allocate touchpad input device\n");
+ return -ENOMEM;
+ }
+
+ touchpad_input_dev->name = "Apple SPI Touchpad";
+ touchpad_input_dev->phys = "applespi/input1";
+ touchpad_input_dev->dev.parent = &applespi->spi->dev;
+ touchpad_input_dev->id.bustype = BUS_SPI;
+ touchpad_input_dev->id.vendor = SYNAPTICS_VENDOR_ID;
+ touchpad_input_dev->id.product =
+ rcvd_tp_info->model_no << 8 | rcvd_tp_info->model_flags;
+
+ /* basic properties */
+ input_set_capability(touchpad_input_dev, EV_REL, REL_X);
+ input_set_capability(touchpad_input_dev, EV_REL, REL_Y);
+
+ __set_bit(INPUT_PROP_POINTER, touchpad_input_dev->propbit);
+ __set_bit(INPUT_PROP_BUTTONPAD, touchpad_input_dev->propbit);
+
+ /* finger touch area */
+ input_set_abs_params(touchpad_input_dev, ABS_MT_TOUCH_MAJOR,
+ 0, 5000, 0, 0);
+ input_set_abs_params(touchpad_input_dev, ABS_MT_TOUCH_MINOR,
+ 0, 5000, 0, 0);
+
+ /* finger approach area */
+ input_set_abs_params(touchpad_input_dev, ABS_MT_WIDTH_MAJOR,
+ 0, 5000, 0, 0);
+ input_set_abs_params(touchpad_input_dev, ABS_MT_WIDTH_MINOR,
+ 0, 5000, 0, 0);
+
+ /* finger orientation */
+ input_set_abs_params(touchpad_input_dev, ABS_MT_ORIENTATION,
+ -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION,
+ 0, 0);
+
+ /* finger position */
+ input_set_abs_params(touchpad_input_dev, ABS_MT_POSITION_X,
+ applespi->tp_info.x_min, applespi->tp_info.x_max,
+ 0, 0);
+ input_set_abs_params(touchpad_input_dev, ABS_MT_POSITION_Y,
+ applespi->tp_info.y_min, applespi->tp_info.y_max,
+ 0, 0);
+
+ /* touchpad button */
+ input_set_capability(touchpad_input_dev, EV_KEY, BTN_LEFT);
+
+ /* multitouch */
+ sts = input_mt_init_slots(touchpad_input_dev, MAX_FINGERS,
+ INPUT_MT_POINTER | INPUT_MT_DROP_UNUSED |
+ INPUT_MT_TRACK);
+ if (sts) {
+ dev_err(&applespi->spi->dev,
+ "failed to initialize slots: %d", sts);
+ return sts;
+ }
+
+ /* register input device */
+ sts = input_register_device(touchpad_input_dev);
+ if (sts) {
+ dev_err(&applespi->spi->dev,
+ "Unable to register touchpad input device (%d)\n", sts);
+ return sts;
+ }
+
+ /* touchpad_input_dev is read async in spi callback */
+ smp_store_release(&applespi->touchpad_input_dev, touchpad_input_dev);
+
+ return 0;
+}
+
+static void applespi_worker(struct work_struct *work)
+{
+ struct applespi_data *applespi =
+ container_of(work, struct applespi_data, work);
+
+ applespi_register_touchpad_device(applespi, &applespi->rcvd_tp_info);
+}
+
+static void applespi_handle_cmd_response(struct applespi_data *applespi,
+ struct spi_packet *packet,
+ struct message *message)
+{
+ if (packet->device == PACKET_DEV_INFO &&
+ le16_to_cpu(message->type) == 0x1020) {
+ /*
+ * We're not allowed to sleep here, but registering an input
+ * device can sleep.
+ */
+ applespi->rcvd_tp_info = message->tp_info;
+ schedule_work(&applespi->work);
+ return;
+ }
+
+ if (le16_to_cpu(message->length) != 0x0000) {
+ dev_warn_ratelimited(&applespi->spi->dev,
+ "Received unexpected write response: length=%x\n",
+ le16_to_cpu(message->length));
+ return;
+ }
+
+ if (packet->device == PACKET_DEV_TPAD &&
+ le16_to_cpu(message->type) == 0x0252 &&
+ le16_to_cpu(message->rsp_buf_len) == 0x0002)
+ dev_info(&applespi->spi->dev, "modeswitch done.\n");
+}
+
+static bool applespi_verify_crc(struct applespi_data *applespi, u8 *buffer,
+ size_t buflen)
+{
+ u16 crc;
+
+ crc = crc16(0, buffer, buflen);
+ if (crc) {
+ dev_warn_ratelimited(&applespi->spi->dev,
+ "Received corrupted packet (crc mismatch)\n");
+ trace_applespi_bad_crc(ET_RD_CRC, READ, buffer, buflen);
+
+ return false;
+ }
+
+ return true;
+}
+
+static void applespi_debug_print_read_packet(struct applespi_data *applespi,
+ struct spi_packet *packet)
+{
+ unsigned int evt_type;
+
+ if (packet->flags == PACKET_TYPE_READ &&
+ packet->device == PACKET_DEV_KEYB)
+ evt_type = ET_RD_KEYB;
+ else if (packet->flags == PACKET_TYPE_READ &&
+ packet->device == PACKET_DEV_TPAD)
+ evt_type = ET_RD_TPAD;
+ else if (packet->flags == PACKET_TYPE_WRITE)
+ evt_type = applespi->cmd_evt_type;
+ else
+ evt_type = ET_RD_UNKN;
+
+ applespi_get_trace_fun(evt_type)(evt_type, PT_READ, applespi->rx_buffer,
+ APPLESPI_PACKET_SIZE);
+}
+
+static void applespi_got_data(struct applespi_data *applespi)
+{
+ struct spi_packet *packet;
+ struct message *message;
+ unsigned int msg_len;
+ unsigned int off;
+ unsigned int rem;
+ unsigned int len;
+
+ /* process packet header */
+ if (!applespi_verify_crc(applespi, applespi->rx_buffer,
+ APPLESPI_PACKET_SIZE)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&applespi->cmd_msg_lock, flags);
+
+ if (applespi->drain) {
+ applespi->read_active = false;
+ applespi->write_active = false;
+
+ wake_up_all(&applespi->drain_complete);
+ }
+
+ spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags);
+
+ return;
+ }
+
+ packet = (struct spi_packet *)applespi->rx_buffer;
+
+ applespi_debug_print_read_packet(applespi, packet);
+
+ off = le16_to_cpu(packet->offset);
+ rem = le16_to_cpu(packet->remaining);
+ len = le16_to_cpu(packet->length);
+
+ if (len > sizeof(packet->data)) {
+ dev_warn_ratelimited(&applespi->spi->dev,
+ "Received corrupted packet (invalid packet length %u)\n",
+ len);
+ goto msg_complete;
+ }
+
+ /* handle multi-packet messages */
+ if (rem > 0 || off > 0) {
+ if (off != applespi->saved_msg_len) {
+ dev_warn_ratelimited(&applespi->spi->dev,
+ "Received unexpected offset (got %u, expected %u)\n",
+ off, applespi->saved_msg_len);
+ goto msg_complete;
+ }
+
+ if (off + rem > MAX_PKTS_PER_MSG * APPLESPI_PACKET_SIZE) {
+ dev_warn_ratelimited(&applespi->spi->dev,
+ "Received message too large (size %u)\n",
+ off + rem);
+ goto msg_complete;
+ }
+
+ if (off + len > MAX_PKTS_PER_MSG * APPLESPI_PACKET_SIZE) {
+ dev_warn_ratelimited(&applespi->spi->dev,
+ "Received message too large (size %u)\n",
+ off + len);
+ goto msg_complete;
+ }
+
+ memcpy(applespi->msg_buf + off, &packet->data, len);
+ applespi->saved_msg_len += len;
+
+ if (rem > 0)
+ return;
+
+ message = (struct message *)applespi->msg_buf;
+ msg_len = applespi->saved_msg_len;
+ } else {
+ message = (struct message *)&packet->data;
+ msg_len = len;
+ }
+
+ /* got complete message - verify */
+ if (!applespi_verify_crc(applespi, (u8 *)message, msg_len))
+ goto msg_complete;
+
+ if (le16_to_cpu(message->length) != msg_len - MSG_HEADER_SIZE - 2) {
+ dev_warn_ratelimited(&applespi->spi->dev,
+ "Received corrupted packet (invalid message length %u - expected %u)\n",
+ le16_to_cpu(message->length),
+ msg_len - MSG_HEADER_SIZE - 2);
+ goto msg_complete;
+ }
+
+ /* handle message */
+ if (packet->flags == PACKET_TYPE_READ &&
+ packet->device == PACKET_DEV_KEYB) {
+ applespi_handle_keyboard_event(applespi, &message->keyboard);
+
+ } else if (packet->flags == PACKET_TYPE_READ &&
+ packet->device == PACKET_DEV_TPAD) {
+ struct touchpad_protocol *tp;
+ size_t tp_len;
+
+ tp = &message->touchpad;
+ tp_len = sizeof(*tp) +
+ tp->number_of_fingers * sizeof(tp->fingers[0]);
+
+ if (le16_to_cpu(message->length) + 2 != tp_len) {
+ dev_warn_ratelimited(&applespi->spi->dev,
+ "Received corrupted packet (invalid message length %u - num-fingers %u, tp-len %zu)\n",
+ le16_to_cpu(message->length),
+ tp->number_of_fingers, tp_len);
+ goto msg_complete;
+ }
+
+ if (tp->number_of_fingers > MAX_FINGERS) {
+ dev_warn_ratelimited(&applespi->spi->dev,
+ "Number of reported fingers (%u) exceeds max (%u))\n",
+ tp->number_of_fingers,
+ MAX_FINGERS);
+ tp->number_of_fingers = MAX_FINGERS;
+ }
+
+ report_tp_state(applespi, tp);
+
+ } else if (packet->flags == PACKET_TYPE_WRITE) {
+ applespi_handle_cmd_response(applespi, packet, message);
+ }
+
+msg_complete:
+ applespi->saved_msg_len = 0;
+
+ applespi_msg_complete(applespi, packet->flags == PACKET_TYPE_WRITE,
+ true);
+}
+
+static void applespi_async_read_complete(void *context)
+{
+ struct applespi_data *applespi = context;
+
+ if (applespi->rd_m.status < 0) {
+ dev_warn(&applespi->spi->dev, "Error reading from device: %d\n",
+ applespi->rd_m.status);
+ /*
+ * We don't actually know if this was a pure read, or a response
+ * to a write. But this is a rare error condition that should
+ * never occur, so clearing both flags to avoid deadlock.
+ */
+ applespi_msg_complete(applespi, true, true);
+ } else {
+ applespi_got_data(applespi);
+ }
+
+ acpi_finish_gpe(NULL, applespi->gpe);
+}
+
+static u32 applespi_notify(acpi_handle gpe_device, u32 gpe, void *context)
+{
+ struct applespi_data *applespi = context;
+ int sts;
+ unsigned long flags;
+
+ trace_applespi_irq_received(ET_RD_IRQ, PT_READ);
+
+ spin_lock_irqsave(&applespi->cmd_msg_lock, flags);
+
+ if (!applespi->suspended) {
+ sts = applespi_async(applespi, &applespi->rd_m,
+ applespi_async_read_complete);
+ if (sts)
+ dev_warn(&applespi->spi->dev,
+ "Error queueing async read to device: %d\n",
+ sts);
+ else
+ applespi->read_active = true;
+ }
+
+ spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags);
+
+ return ACPI_INTERRUPT_HANDLED;
+}
+
+static int applespi_get_saved_bl_level(struct applespi_data *applespi)
+{
+ struct efivar_entry *efivar_entry;
+ u16 efi_data = 0;
+ unsigned long efi_data_len;
+ int sts;
+
+ efivar_entry = kmalloc(sizeof(*efivar_entry), GFP_KERNEL);
+ if (!efivar_entry)
+ return -ENOMEM;
+
+ memcpy(efivar_entry->var.VariableName, EFI_BL_LEVEL_NAME,
+ sizeof(EFI_BL_LEVEL_NAME));
+ efivar_entry->var.VendorGuid = EFI_BL_LEVEL_GUID;
+ efi_data_len = sizeof(efi_data);
+
+ sts = efivar_entry_get(efivar_entry, NULL, &efi_data_len, &efi_data);
+ if (sts && sts != -ENOENT)
+ dev_warn(&applespi->spi->dev,
+ "Error getting backlight level from EFI vars: %d\n",
+ sts);
+
+ kfree(efivar_entry);
+
+ return sts ? sts : efi_data;
+}
+
+static void applespi_save_bl_level(struct applespi_data *applespi,
+ unsigned int level)
+{
+ efi_guid_t efi_guid;
+ u32 efi_attr;
+ unsigned long efi_data_len;
+ u16 efi_data;
+ int sts;
+
+ /* Save keyboard backlight level */
+ efi_guid = EFI_BL_LEVEL_GUID;
+ efi_data = (u16)level;
+ efi_data_len = sizeof(efi_data);
+ efi_attr = EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS |
+ EFI_VARIABLE_RUNTIME_ACCESS;
+
+ sts = efivar_entry_set_safe(EFI_BL_LEVEL_NAME, efi_guid, efi_attr, true,
+ efi_data_len, &efi_data);
+ if (sts)
+ dev_warn(&applespi->spi->dev,
+ "Error saving backlight level to EFI vars: %d\n", sts);
+}
+
+static int applespi_probe(struct spi_device *spi)
+{
+ struct applespi_data *applespi;
+ acpi_handle spi_handle = ACPI_HANDLE(&spi->dev);
+ acpi_status acpi_sts;
+ int sts, i;
+ unsigned long long gpe, usb_status;
+
+ /* check if the USB interface is present and enabled already */
+ acpi_sts = acpi_evaluate_integer(spi_handle, "UIST", NULL, &usb_status);
+ if (ACPI_SUCCESS(acpi_sts) && usb_status) {
+ /* let the USB driver take over instead */
+ dev_info(&spi->dev, "USB interface already enabled\n");
+ return -ENODEV;
+ }
+
+ /* allocate driver data */
+ applespi = devm_kzalloc(&spi->dev, sizeof(*applespi), GFP_KERNEL);
+ if (!applespi)
+ return -ENOMEM;
+
+ applespi->spi = spi;
+
+ INIT_WORK(&applespi->work, applespi_worker);
+
+ /* store the driver data */
+ spi_set_drvdata(spi, applespi);
+
+ /* create our buffers */
+ applespi->tx_buffer = devm_kmalloc(&spi->dev, APPLESPI_PACKET_SIZE,
+ GFP_KERNEL);
+ applespi->tx_status = devm_kmalloc(&spi->dev, APPLESPI_STATUS_SIZE,
+ GFP_KERNEL);
+ applespi->rx_buffer = devm_kmalloc(&spi->dev, APPLESPI_PACKET_SIZE,
+ GFP_KERNEL);
+ applespi->msg_buf = devm_kmalloc_array(&spi->dev, MAX_PKTS_PER_MSG,
+ APPLESPI_PACKET_SIZE,
+ GFP_KERNEL);
+
+ if (!applespi->tx_buffer || !applespi->tx_status ||
+ !applespi->rx_buffer || !applespi->msg_buf)
+ return -ENOMEM;
+
+ /* set up our spi messages */
+ applespi_setup_read_txfrs(applespi);
+ applespi_setup_write_txfrs(applespi);
+
+ /* cache ACPI method handles */
+ acpi_sts = acpi_get_handle(spi_handle, "SIEN", &applespi->sien);
+ if (ACPI_FAILURE(acpi_sts)) {
+ dev_err(&applespi->spi->dev,
+ "Failed to get SIEN ACPI method handle: %s\n",
+ acpi_format_exception(acpi_sts));
+ return -ENODEV;
+ }
+
+ acpi_sts = acpi_get_handle(spi_handle, "SIST", &applespi->sist);
+ if (ACPI_FAILURE(acpi_sts)) {
+ dev_err(&applespi->spi->dev,
+ "Failed to get SIST ACPI method handle: %s\n",
+ acpi_format_exception(acpi_sts));
+ return -ENODEV;
+ }
+
+ /* switch on the SPI interface */
+ sts = applespi_setup_spi(applespi);
+ if (sts)
+ return sts;
+
+ sts = applespi_enable_spi(applespi);
+ if (sts)
+ return sts;
+
+ /* setup the keyboard input dev */
+ applespi->keyboard_input_dev = devm_input_allocate_device(&spi->dev);
+
+ if (!applespi->keyboard_input_dev)
+ return -ENOMEM;
+
+ applespi->keyboard_input_dev->name = "Apple SPI Keyboard";
+ applespi->keyboard_input_dev->phys = "applespi/input0";
+ applespi->keyboard_input_dev->dev.parent = &spi->dev;
+ applespi->keyboard_input_dev->id.bustype = BUS_SPI;
+
+ applespi->keyboard_input_dev->evbit[0] =
+ BIT_MASK(EV_KEY) | BIT_MASK(EV_LED) | BIT_MASK(EV_REP);
+ applespi->keyboard_input_dev->ledbit[0] = BIT_MASK(LED_CAPSL);
+
+ input_set_drvdata(applespi->keyboard_input_dev, applespi);
+ applespi->keyboard_input_dev->event = applespi_event;
+
+ for (i = 0; i < ARRAY_SIZE(applespi_scancodes); i++)
+ if (applespi_scancodes[i])
+ input_set_capability(applespi->keyboard_input_dev,
+ EV_KEY, applespi_scancodes[i]);
+
+ for (i = 0; i < ARRAY_SIZE(applespi_controlcodes); i++)
+ if (applespi_controlcodes[i])
+ input_set_capability(applespi->keyboard_input_dev,
+ EV_KEY, applespi_controlcodes[i]);
+
+ for (i = 0; i < ARRAY_SIZE(applespi_fn_codes); i++)
+ if (applespi_fn_codes[i].to)
+ input_set_capability(applespi->keyboard_input_dev,
+ EV_KEY, applespi_fn_codes[i].to);
+
+ input_set_capability(applespi->keyboard_input_dev, EV_KEY, KEY_FN);
+
+ sts = input_register_device(applespi->keyboard_input_dev);
+ if (sts) {
+ dev_err(&applespi->spi->dev,
+ "Unable to register keyboard input device (%d)\n", sts);
+ return -ENODEV;
+ }
+
+ /*
+ * The applespi device doesn't send interrupts normally (as is described
+ * in its DSDT), but rather seems to use ACPI GPEs.
+ */
+ acpi_sts = acpi_evaluate_integer(spi_handle, "_GPE", NULL, &gpe);
+ if (ACPI_FAILURE(acpi_sts)) {
+ dev_err(&applespi->spi->dev,
+ "Failed to obtain GPE for SPI slave device: %s\n",
+ acpi_format_exception(acpi_sts));
+ return -ENODEV;
+ }
+ applespi->gpe = (int)gpe;
+
+ acpi_sts = acpi_install_gpe_handler(NULL, applespi->gpe,
+ ACPI_GPE_LEVEL_TRIGGERED,
+ applespi_notify, applespi);
+ if (ACPI_FAILURE(acpi_sts)) {
+ dev_err(&applespi->spi->dev,
+ "Failed to install GPE handler for GPE %d: %s\n",
+ applespi->gpe, acpi_format_exception(acpi_sts));
+ return -ENODEV;
+ }
+
+ applespi->suspended = false;
+
+ acpi_sts = acpi_enable_gpe(NULL, applespi->gpe);
+ if (ACPI_FAILURE(acpi_sts)) {
+ dev_err(&applespi->spi->dev,
+ "Failed to enable GPE handler for GPE %d: %s\n",
+ applespi->gpe, acpi_format_exception(acpi_sts));
+ acpi_remove_gpe_handler(NULL, applespi->gpe, applespi_notify);
+ return -ENODEV;
+ }
+
+ /* trigger touchpad setup */
+ applespi_init(applespi, false);
+
+ /*
+ * By default this device is not enabled for wakeup; but USB keyboards
+ * generally are, so the expectation is that by default the keyboard
+ * will wake the system.
+ */
+ device_wakeup_enable(&spi->dev);
+
+ /* set up keyboard-backlight */
+ sts = applespi_get_saved_bl_level(applespi);
+ if (sts >= 0)
+ applespi_set_bl_level(&applespi->backlight_info, sts);
+
+ applespi->backlight_info.name = "spi::kbd_backlight";
+ applespi->backlight_info.default_trigger = "kbd-backlight";
+ applespi->backlight_info.brightness_set = applespi_set_bl_level;
+
+ sts = devm_led_classdev_register(&spi->dev, &applespi->backlight_info);
+ if (sts)
+ dev_warn(&applespi->spi->dev,
+ "Unable to register keyboard backlight class dev (%d)\n",
+ sts);
+
+ /* set up debugfs entries for touchpad dimensions logging */
+ applespi->debugfs_root = debugfs_create_dir("applespi", NULL);
+ if (IS_ERR(applespi->debugfs_root)) {
+ if (PTR_ERR(applespi->debugfs_root) != -ENODEV)
+ dev_warn(&applespi->spi->dev,
+ "Error creating debugfs root entry (%ld)\n",
+ PTR_ERR(applespi->debugfs_root));
+ } else {
+ struct dentry *ret;
+
+ ret = debugfs_create_bool("enable_tp_dim", 0600,
+ applespi->debugfs_root,
+ &applespi->debug_tp_dim);
+ if (IS_ERR(ret))
+ dev_dbg(&applespi->spi->dev,
+ "Error creating debugfs entry enable_tp_dim (%ld)\n",
+ PTR_ERR(ret));
+
+ ret = debugfs_create_file("tp_dim", 0400,
+ applespi->debugfs_root, applespi,
+ &applespi_tp_dim_fops);
+ if (IS_ERR(ret))
+ dev_dbg(&applespi->spi->dev,
+ "Error creating debugfs entry tp_dim (%ld)\n",
+ PTR_ERR(ret));
+ }
+
+ return 0;
+}
+
+static void applespi_drain_writes(struct applespi_data *applespi)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&applespi->cmd_msg_lock, flags);
+
+ applespi->drain = true;
+ wait_event_lock_irq(applespi->drain_complete, !applespi->write_active,
+ applespi->cmd_msg_lock);
+
+ spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags);
+}
+
+static void applespi_drain_reads(struct applespi_data *applespi)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&applespi->cmd_msg_lock, flags);
+
+ wait_event_lock_irq(applespi->drain_complete, !applespi->read_active,
+ applespi->cmd_msg_lock);
+
+ applespi->suspended = true;
+
+ spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags);
+}
+
+static int applespi_remove(struct spi_device *spi)
+{
+ struct applespi_data *applespi = spi_get_drvdata(spi);
+
+ applespi_drain_writes(applespi);
+
+ acpi_disable_gpe(NULL, applespi->gpe);
+ acpi_remove_gpe_handler(NULL, applespi->gpe, applespi_notify);
+ device_wakeup_disable(&spi->dev);
+
+ applespi_drain_reads(applespi);
+
+ debugfs_remove_recursive(applespi->debugfs_root);
+
+ return 0;
+}
+
+static void applespi_shutdown(struct spi_device *spi)
+{
+ struct applespi_data *applespi = spi_get_drvdata(spi);
+
+ applespi_save_bl_level(applespi, applespi->have_bl_level);
+}
+
+static int applespi_poweroff_late(struct device *dev)
+{
+ struct spi_device *spi = to_spi_device(dev);
+ struct applespi_data *applespi = spi_get_drvdata(spi);
+
+ applespi_save_bl_level(applespi, applespi->have_bl_level);
+
+ return 0;
+}
+
+static int __maybe_unused applespi_suspend(struct device *dev)
+{
+ struct spi_device *spi = to_spi_device(dev);
+ struct applespi_data *applespi = spi_get_drvdata(spi);
+ acpi_status acpi_sts;
+ int sts;
+
+ /* turn off caps-lock - it'll stay on otherwise */
+ sts = applespi_set_capsl_led(applespi, false);
+ if (sts)
+ dev_warn(&applespi->spi->dev,
+ "Failed to turn off caps-lock led (%d)\n", sts);
+
+ applespi_drain_writes(applespi);
+
+ /* disable the interrupt */
+ acpi_sts = acpi_disable_gpe(NULL, applespi->gpe);
+ if (ACPI_FAILURE(acpi_sts))
+ dev_err(&applespi->spi->dev,
+ "Failed to disable GPE handler for GPE %d: %s\n",
+ applespi->gpe, acpi_format_exception(acpi_sts));
+
+ applespi_drain_reads(applespi);
+
+ return 0;
+}
+
+static int __maybe_unused applespi_resume(struct device *dev)
+{
+ struct spi_device *spi = to_spi_device(dev);
+ struct applespi_data *applespi = spi_get_drvdata(spi);
+ acpi_status acpi_sts;
+ unsigned long flags;
+
+ /* ensure our flags and state reflect a newly resumed device */
+ spin_lock_irqsave(&applespi->cmd_msg_lock, flags);
+
+ applespi->drain = false;
+ applespi->have_cl_led_on = false;
+ applespi->have_bl_level = 0;
+ applespi->cmd_msg_queued = false;
+ applespi->read_active = false;
+ applespi->write_active = false;
+
+ applespi->suspended = false;
+
+ spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags);
+
+ /* switch on the SPI interface */
+ applespi_enable_spi(applespi);
+
+ /* re-enable the interrupt */
+ acpi_sts = acpi_enable_gpe(NULL, applespi->gpe);
+ if (ACPI_FAILURE(acpi_sts))
+ dev_err(&applespi->spi->dev,
+ "Failed to re-enable GPE handler for GPE %d: %s\n",
+ applespi->gpe, acpi_format_exception(acpi_sts));
+
+ /* switch the touchpad into multitouch mode */
+ applespi_init(applespi, true);
+
+ return 0;
+}
+
+static const struct acpi_device_id applespi_acpi_match[] = {
+ { "APP000D", 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, applespi_acpi_match);
+
+const struct dev_pm_ops applespi_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(applespi_suspend, applespi_resume)
+ .poweroff_late = applespi_poweroff_late,
+};
+
+static struct spi_driver applespi_driver = {
+ .driver = {
+ .name = "applespi",
+ .acpi_match_table = applespi_acpi_match,
+ .pm = &applespi_pm_ops,
+ },
+ .probe = applespi_probe,
+ .remove = applespi_remove,
+ .shutdown = applespi_shutdown,
+};
+
+module_spi_driver(applespi_driver)
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("MacBook(Pro) SPI Keyboard/Touchpad driver");
+MODULE_AUTHOR("Federico Lorenzi");
+MODULE_AUTHOR("Ronald Tschalär");
diff --git a/drivers/input/keyboard/applespi.h b/drivers/input/keyboard/applespi.h
new file mode 100644
index 000000000000..7f5ab10c597a
--- /dev/null
+++ b/drivers/input/keyboard/applespi.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * MacBook (Pro) SPI keyboard and touchpad driver
+ *
+ * Copyright (c) 2015-2019 Federico Lorenzi
+ * Copyright (c) 2017-2019 Ronald Tschalär
+ */
+
+#ifndef _APPLESPI_H_
+#define _APPLESPI_H_
+
+enum applespi_evt_type {
+ ET_CMD_TP_INI = BIT(0),
+ ET_CMD_BL = BIT(1),
+ ET_CMD_CL = BIT(2),
+ ET_RD_KEYB = BIT(8),
+ ET_RD_TPAD = BIT(9),
+ ET_RD_UNKN = BIT(10),
+ ET_RD_IRQ = BIT(11),
+ ET_RD_CRC = BIT(12),
+};
+
+enum applespi_pkt_type {
+ PT_READ,
+ PT_WRITE,
+ PT_STATUS,
+};
+
+#endif /* _APPLESPI_H_ */
diff --git a/drivers/input/keyboard/applespi_trace.h b/drivers/input/keyboard/applespi_trace.h
new file mode 100644
index 000000000000..0ad1a3d79f50
--- /dev/null
+++ b/drivers/input/keyboard/applespi_trace.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * MacBook (Pro) SPI keyboard and touchpad driver
+ *
+ * Copyright (c) 2015-2019 Federico Lorenzi
+ * Copyright (c) 2017-2019 Ronald Tschalär
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM applespi
+
+#if !defined(_APPLESPI_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _APPLESPI_TRACE_H_
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#include "applespi.h"
+
+DECLARE_EVENT_CLASS(dump_message_template,
+ TP_PROTO(enum applespi_evt_type evt_type,
+ enum applespi_pkt_type pkt_type,
+ u8 *buf,
+ size_t len),
+
+ TP_ARGS(evt_type, pkt_type, buf, len),
+
+ TP_STRUCT__entry(
+ __field(enum applespi_evt_type, evt_type)
+ __field(enum applespi_pkt_type, pkt_type)
+ __field(size_t, len)
+ __dynamic_array(u8, buf, len)
+ ),
+
+ TP_fast_assign(
+ __entry->evt_type = evt_type;
+ __entry->pkt_type = pkt_type;
+ __entry->len = len;
+ memcpy(__get_dynamic_array(buf), buf, len);
+ ),
+
+ TP_printk("%-6s: %s",
+ __print_symbolic(__entry->pkt_type,
+ { PT_READ, "read" },
+ { PT_WRITE, "write" },
+ { PT_STATUS, "status" }
+ ),
+ __print_hex(__get_dynamic_array(buf), __entry->len))
+);
+
+#define DEFINE_DUMP_MESSAGE_EVENT(name) \
+DEFINE_EVENT(dump_message_template, name, \
+ TP_PROTO(enum applespi_evt_type evt_type, \
+ enum applespi_pkt_type pkt_type, \
+ u8 *buf, \
+ size_t len), \
+ TP_ARGS(evt_type, pkt_type, buf, len) \
+)
+
+DEFINE_DUMP_MESSAGE_EVENT(applespi_tp_ini_cmd);
+DEFINE_DUMP_MESSAGE_EVENT(applespi_backlight_cmd);
+DEFINE_DUMP_MESSAGE_EVENT(applespi_caps_lock_cmd);
+DEFINE_DUMP_MESSAGE_EVENT(applespi_keyboard_data);
+DEFINE_DUMP_MESSAGE_EVENT(applespi_touchpad_data);
+DEFINE_DUMP_MESSAGE_EVENT(applespi_unknown_data);
+DEFINE_DUMP_MESSAGE_EVENT(applespi_bad_crc);
+
+TRACE_EVENT(applespi_irq_received,
+ TP_PROTO(enum applespi_evt_type evt_type,
+ enum applespi_pkt_type pkt_type),
+
+ TP_ARGS(evt_type, pkt_type),
+
+ TP_STRUCT__entry(
+ __field(enum applespi_evt_type, evt_type)
+ __field(enum applespi_pkt_type, pkt_type)
+ ),
+
+ TP_fast_assign(
+ __entry->evt_type = evt_type;
+ __entry->pkt_type = pkt_type;
+ ),
+
+ "\n"
+);
+
+#endif /* _APPLESPI_TRACE_H_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/input/keyboard
+#define TRACE_INCLUDE_FILE applespi_trace
+#include <trace/define_trace.h>
diff --git a/drivers/input/keyboard/mtk-pmic-keys.c b/drivers/input/keyboard/mtk-pmic-keys.c
index 746ff06eaf8d..62391d6c7da6 100644
--- a/drivers/input/keyboard/mtk-pmic-keys.c
+++ b/drivers/input/keyboard/mtk-pmic-keys.c
@@ -277,8 +277,10 @@ static int mtk_pmic_keys_probe(struct platform_device *pdev)
keys->keys[index].regs = &mtk_pmic_regs->keys_regs[index];
keys->keys[index].irq = platform_get_irq(pdev, index);
- if (keys->keys[index].irq < 0)
+ if (keys->keys[index].irq < 0) {
+ of_node_put(child);
return keys->keys[index].irq;
+ }
error = of_property_read_u32(child,
"linux,keycodes", &keys->keys[index].keycode);
@@ -286,6 +288,7 @@ static int mtk_pmic_keys_probe(struct platform_device *pdev)
dev_err(keys->dev,
"failed to read key:%d linux,keycode property: %d\n",
index, error);
+ of_node_put(child);
return error;
}
@@ -293,8 +296,10 @@ static int mtk_pmic_keys_probe(struct platform_device *pdev)
keys->keys[index].wakeup = true;
error = mtk_pmic_key_setup(keys, &keys->keys[index]);
- if (error)
+ if (error) {
+ of_node_put(child);
return error;
+ }
index++;
}
diff --git a/drivers/input/keyboard/sun4i-lradc-keys.c b/drivers/input/keyboard/sun4i-lradc-keys.c
index 6ffdc26b9c89..4a796bed48ac 100644
--- a/drivers/input/keyboard/sun4i-lradc-keys.c
+++ b/drivers/input/keyboard/sun4i-lradc-keys.c
@@ -198,18 +198,21 @@ static int sun4i_lradc_load_dt_keymap(struct device *dev,
error = of_property_read_u32(pp, "channel", &channel);
if (error || channel != 0) {
dev_err(dev, "%pOFn: Inval channel prop\n", pp);
+ of_node_put(pp);
return -EINVAL;
}
error = of_property_read_u32(pp, "voltage", &map->voltage);
if (error) {
dev_err(dev, "%pOFn: Inval voltage prop\n", pp);
+ of_node_put(pp);
return -EINVAL;
}
error = of_property_read_u32(pp, "linux,code", &map->keycode);
if (error) {
dev_err(dev, "%pOFn: Inval linux,code prop\n", pp);
+ of_node_put(pp);
return -EINVAL;
}
diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 8996323ce8d9..34700eda0429 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -21,6 +21,7 @@
#include "psmouse.h"
#include "alps.h"
+#include "trackpoint.h"
/*
* Definitions for ALPS version 3 and 4 command mode protocol
@@ -2861,6 +2862,23 @@ static const struct alps_protocol_info *alps_match_table(unsigned char *e7,
return NULL;
}
+static bool alps_is_cs19_trackpoint(struct psmouse *psmouse)
+{
+ u8 param[2] = { 0 };
+
+ if (ps2_command(&psmouse->ps2dev,
+ param, MAKE_PS2_CMD(0, 2, TP_READ_ID)))
+ return false;
+
+ /*
+ * param[0] contains the trackpoint device variant_id while
+ * param[1] contains the firmware_id. So far all alps
+ * trackpoint-only devices have their variant_ids equal
+ * TP_VARIANT_ALPS and their firmware_ids are in 0x20~0x2f range.
+ */
+ return param[0] == TP_VARIANT_ALPS && ((param[1] & 0xf0) == 0x20);
+}
+
static int alps_identify(struct psmouse *psmouse, struct alps_data *priv)
{
const struct alps_protocol_info *protocol;
@@ -3162,6 +3180,20 @@ int alps_detect(struct psmouse *psmouse, bool set_properties)
return error;
/*
+ * ALPS cs19 is a trackpoint-only device, and uses different
+ * protocol than DualPoint ones, so we return -EINVAL here and let
+ * trackpoint.c drive this device. If the trackpoint driver is not
+ * enabled, the device will fall back to a bare PS/2 mouse.
+ * If ps2_command() fails here, we depend on the immediately
+ * followed psmouse_reset() to reset the device to normal state.
+ */
+ if (alps_is_cs19_trackpoint(psmouse)) {
+ psmouse_dbg(psmouse,
+ "ALPS CS19 trackpoint-only device detected, ignoring\n");
+ return -EINVAL;
+ }
+
+ /*
* Reset the device to make sure it is fully operational:
* on some laptops, like certain Dell Latitudes, we may
* fail to properly detect presence of trackstick if device
diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index 1080c0c49815..b1956ed4c0dd 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -176,6 +176,7 @@ static const char * const smbus_pnp_ids[] = {
"LEN0093", /* T480 */
"LEN0096", /* X280 */
"LEN0097", /* X280 -> ALPS trackpoint */
+ "LEN009b", /* T580 */
"LEN200f", /* T450s */
"LEN2054", /* E480 */
"LEN2055", /* E580 */
@@ -705,7 +706,7 @@ static void synaptics_pt_create(struct psmouse *psmouse)
serio->id.type = SERIO_PS_PSTHRU;
strlcpy(serio->name, "Synaptics pass-through", sizeof(serio->name));
- strlcpy(serio->phys, "synaptics-pt/serio0", sizeof(serio->name));
+ strlcpy(serio->phys, "synaptics-pt/serio0", sizeof(serio->phys));
serio->write = synaptics_pt_write;
serio->start = synaptics_pt_start;
serio->stop = synaptics_pt_stop;
diff --git a/drivers/input/mouse/trackpoint.h b/drivers/input/mouse/trackpoint.h
index 0afffe8d824f..77110f3ec21d 100644
--- a/drivers/input/mouse/trackpoint.h
+++ b/drivers/input/mouse/trackpoint.h
@@ -158,7 +158,8 @@ struct trackpoint_data {
#ifdef CONFIG_MOUSE_PS2_TRACKPOINT
int trackpoint_detect(struct psmouse *psmouse, bool set_properties);
#else
-inline int trackpoint_detect(struct psmouse *psmouse, bool set_properties)
+static inline int trackpoint_detect(struct psmouse *psmouse,
+ bool set_properties)
{
return -ENOSYS;
}
diff --git a/drivers/input/serio/hyperv-keyboard.c b/drivers/input/serio/hyperv-keyboard.c
index 8e457e50f837..88ae7c2ac3c8 100644
--- a/drivers/input/serio/hyperv-keyboard.c
+++ b/drivers/input/serio/hyperv-keyboard.c
@@ -75,8 +75,8 @@ struct synth_kbd_keystroke {
#define HK_MAXIMUM_MESSAGE_SIZE 256
-#define KBD_VSC_SEND_RING_BUFFER_SIZE (10 * PAGE_SIZE)
-#define KBD_VSC_RECV_RING_BUFFER_SIZE (10 * PAGE_SIZE)
+#define KBD_VSC_SEND_RING_BUFFER_SIZE (40 * 1024)
+#define KBD_VSC_RECV_RING_BUFFER_SIZE (40 * 1024)
#define XTKBD_EMUL0 0xe0
#define XTKBD_EMUL1 0xe1
diff --git a/drivers/input/tablet/gtco.c b/drivers/input/tablet/gtco.c
index 4b8b9d7aa75e..35031228a6d0 100644
--- a/drivers/input/tablet/gtco.c
+++ b/drivers/input/tablet/gtco.c
@@ -78,6 +78,7 @@ Scott Hill shill@gtcocalcomp.com
/* Max size of a single report */
#define REPORT_MAX_SIZE 10
+#define MAX_COLLECTION_LEVELS 10
/* Bitmask whether pen is in range */
@@ -223,8 +224,7 @@ static void parse_hid_report_descriptor(struct gtco *device, char * report,
char maintype = 'x';
char globtype[12];
int indent = 0;
- char indentstr[10] = "";
-
+ char indentstr[MAX_COLLECTION_LEVELS + 1] = { 0 };
dev_dbg(ddev, "======>>>>>>PARSE<<<<<<======\n");
@@ -350,6 +350,13 @@ static void parse_hid_report_descriptor(struct gtco *device, char * report,
case TAG_MAIN_COL_START:
maintype = 'S';
+ if (indent == MAX_COLLECTION_LEVELS) {
+ dev_err(ddev, "Collection level %d would exceed limit of %d\n",
+ indent + 1,
+ MAX_COLLECTION_LEVELS);
+ break;
+ }
+
if (data == 0) {
dev_dbg(ddev, "======>>>>>> Physical\n");
strcpy(globtype, "Physical");
@@ -369,8 +376,15 @@ static void parse_hid_report_descriptor(struct gtco *device, char * report,
break;
case TAG_MAIN_COL_END:
- dev_dbg(ddev, "<<<<<<======\n");
maintype = 'E';
+
+ if (indent == 0) {
+ dev_err(ddev, "Collection level already at zero\n");
+ break;
+ }
+
+ dev_dbg(ddev, "<<<<<<======\n");
+
indent--;
for (x = 0; x < indent; x++)
indentstr[x] = '-';
diff --git a/drivers/input/touchscreen/auo-pixcir-ts.c b/drivers/input/touchscreen/auo-pixcir-ts.c
index 8e48fbda487a..8e9f3b7b8180 100644
--- a/drivers/input/touchscreen/auo-pixcir-ts.c
+++ b/drivers/input/touchscreen/auo-pixcir-ts.c
@@ -602,9 +602,8 @@ static int auo_pixcir_probe(struct i2c_client *client,
return error;
}
- error = devm_add_action(&client->dev, auo_pixcir_reset, ts);
+ error = devm_add_action_or_reset(&client->dev, auo_pixcir_reset, ts);
if (error) {
- auo_pixcir_reset(ts);
dev_err(&client->dev, "failed to register reset action, %d\n",
error);
return error;
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 73740b969e62..b607a92791d3 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2533,7 +2533,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
npages = sg_num_pages(dev, sglist, nelems);
address = dma_ops_alloc_iova(dev, dma_dom, npages, dma_mask);
- if (address == DMA_MAPPING_ERROR)
+ if (!address)
goto out_err;
prot = dir2prot(direction);
diff --git a/drivers/memory/.gitignore b/drivers/memory/.gitignore
new file mode 100644
index 000000000000..cbca8b028437
--- /dev/null
+++ b/drivers/memory/.gitignore
@@ -0,0 +1 @@
+ti-emif-asm-offsets.h
diff --git a/drivers/memory/Makefile b/drivers/memory/Makefile
index 9d5c409a1591..27b493435e61 100644
--- a/drivers/memory/Makefile
+++ b/drivers/memory/Makefile
@@ -29,9 +29,10 @@ ti-emif-sram-objs := ti-emif-pm.o ti-emif-sram-pm.o
AFLAGS_ti-emif-sram-pm.o :=-Wa,-march=armv7-a
-drivers/memory/ti-emif-sram-pm.o: include/generated/ti-emif-asm-offsets.h
+$(obj)/ti-emif-sram-pm.o: $(obj)/ti-emif-asm-offsets.h
-include/generated/ti-emif-asm-offsets.h: drivers/memory/emif-asm-offsets.s FORCE
+$(obj)/ti-emif-asm-offsets.h: $(obj)/emif-asm-offsets.s FORCE
$(call filechk,offsets,__TI_EMIF_ASM_OFFSETS_H__)
targets += emif-asm-offsets.s
+clean-files += ti-emif-asm-offsets.h
diff --git a/drivers/memory/ti-emif-sram-pm.S b/drivers/memory/ti-emif-sram-pm.S
index d75ae18efa7d..d1c83bd5b98e 100644
--- a/drivers/memory/ti-emif-sram-pm.S
+++ b/drivers/memory/ti-emif-sram-pm.S
@@ -14,12 +14,12 @@
* GNU General Public License for more details.
*/
-#include <generated/ti-emif-asm-offsets.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/memory.h>
#include "emif.h"
+#include "ti-emif-asm-offsets.h"
#define EMIF_POWER_MGMT_WAIT_SELF_REFRESH_8192_CYCLES 0x00a0
#define EMIF_POWER_MGMT_SR_TIMER_MASK 0x00f0
diff --git a/drivers/ntb/Kconfig b/drivers/ntb/Kconfig
index c99eed87382a..df16c755b4da 100644
--- a/drivers/ntb/Kconfig
+++ b/drivers/ntb/Kconfig
@@ -13,6 +13,17 @@ menuconfig NTB
if NTB
+config NTB_MSI
+ bool "MSI Interrupt Support"
+ depends on PCI_MSI
+ help
+ Support using MSI interrupt forwarding instead of (or in addition to)
+ hardware doorbells. MSI interrupts typically offer lower latency
+ than doorbells and more MSI interrupts can be made available to
+ clients. However this requires an extra memory window and support
+ in the hardware driver for creating the MSI interrupts.
+
+ If unsure, say N.
source "drivers/ntb/hw/Kconfig"
source "drivers/ntb/test/Kconfig"
diff --git a/drivers/ntb/Makefile b/drivers/ntb/Makefile
index 5c64438d5b3f..3a6fa181ff99 100644
--- a/drivers/ntb/Makefile
+++ b/drivers/ntb/Makefile
@@ -1,3 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_NTB) += ntb.o hw/ test/
obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport.o
+
+ntb-y := core.o
+ntb-$(CONFIG_NTB_MSI) += msi.o
diff --git a/drivers/ntb/ntb.c b/drivers/ntb/core.c
index 2581ab724c34..2581ab724c34 100644
--- a/drivers/ntb/ntb.c
+++ b/drivers/ntb/core.c
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
index efb214fc545a..2859cc99b73e 100644
--- a/drivers/ntb/hw/amd/ntb_hw_amd.c
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
@@ -160,8 +160,8 @@ static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
}
/* set and verify setting the limit */
- write64(limit, mmio + limit_reg);
- reg_val = read64(mmio + limit_reg);
+ write64(limit, peer_mmio + limit_reg);
+ reg_val = read64(peer_mmio + limit_reg);
if (reg_val != limit) {
write64(base_addr, mmio + limit_reg);
write64(0, peer_mmio + xlat_reg);
@@ -183,8 +183,8 @@ static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
}
/* set and verify setting the limit */
- writel(limit, mmio + limit_reg);
- reg_val = readl(mmio + limit_reg);
+ writel(limit, peer_mmio + limit_reg);
+ reg_val = readl(peer_mmio + limit_reg);
if (reg_val != limit) {
writel(base_addr, mmio + limit_reg);
writel(0, peer_mmio + xlat_reg);
@@ -333,7 +333,7 @@ static u64 amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector)
if (db_vector < 0 || db_vector > ndev->db_count)
return 0;
- return ntb_ndev(ntb)->db_valid_mask & (1 << db_vector);
+ return ntb_ndev(ntb)->db_valid_mask & (1ULL << db_vector);
}
static u64 amd_ntb_db_read(struct ntb_dev *ntb)
diff --git a/drivers/ntb/hw/intel/ntb_hw_gen3.c b/drivers/ntb/hw/intel/ntb_hw_gen3.c
index f475b56a3f49..c3397160db7f 100644
--- a/drivers/ntb/hw/intel/ntb_hw_gen3.c
+++ b/drivers/ntb/hw/intel/ntb_hw_gen3.c
@@ -532,9 +532,9 @@ static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
return 0;
}
-int intel_ntb3_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr,
- resource_size_t *db_size,
- u64 *db_data, int db_bit)
+static int intel_ntb3_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr,
+ resource_size_t *db_size,
+ u64 *db_data, int db_bit)
{
phys_addr_t db_addr_base;
struct intel_ntb_dev *ndev = ntb_ndev(ntb);
diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
index db4967748e4d..f4959458d909 100644
--- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
+++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
@@ -86,7 +86,8 @@ struct switchtec_ntb {
bool link_is_up;
enum ntb_speed link_speed;
enum ntb_width link_width;
- struct work_struct link_reinit_work;
+ struct work_struct check_link_status_work;
+ bool link_force_down;
};
static struct switchtec_ntb *ntb_sndev(struct ntb_dev *ntb)
@@ -485,33 +486,11 @@ enum switchtec_msg {
static int switchtec_ntb_reinit_peer(struct switchtec_ntb *sndev);
-static void link_reinit_work(struct work_struct *work)
-{
- struct switchtec_ntb *sndev;
-
- sndev = container_of(work, struct switchtec_ntb, link_reinit_work);
-
- switchtec_ntb_reinit_peer(sndev);
-}
-
-static void switchtec_ntb_check_link(struct switchtec_ntb *sndev,
- enum switchtec_msg msg)
+static void switchtec_ntb_link_status_update(struct switchtec_ntb *sndev)
{
int link_sta;
int old = sndev->link_is_up;
- if (msg == MSG_LINK_FORCE_DOWN) {
- schedule_work(&sndev->link_reinit_work);
-
- if (sndev->link_is_up) {
- sndev->link_is_up = 0;
- ntb_link_event(&sndev->ntb);
- dev_info(&sndev->stdev->dev, "ntb link forced down\n");
- }
-
- return;
- }
-
link_sta = sndev->self_shared->link_sta;
if (link_sta) {
u64 peer = ioread64(&sndev->peer_shared->magic);
@@ -536,6 +515,38 @@ static void switchtec_ntb_check_link(struct switchtec_ntb *sndev,
}
}
+static void check_link_status_work(struct work_struct *work)
+{
+ struct switchtec_ntb *sndev;
+
+ sndev = container_of(work, struct switchtec_ntb,
+ check_link_status_work);
+
+ if (sndev->link_force_down) {
+ sndev->link_force_down = false;
+ switchtec_ntb_reinit_peer(sndev);
+
+ if (sndev->link_is_up) {
+ sndev->link_is_up = 0;
+ ntb_link_event(&sndev->ntb);
+ dev_info(&sndev->stdev->dev, "ntb link forced down\n");
+ }
+
+ return;
+ }
+
+ switchtec_ntb_link_status_update(sndev);
+}
+
+static void switchtec_ntb_check_link(struct switchtec_ntb *sndev,
+ enum switchtec_msg msg)
+{
+ if (msg == MSG_LINK_FORCE_DOWN)
+ sndev->link_force_down = true;
+
+ schedule_work(&sndev->check_link_status_work);
+}
+
static void switchtec_ntb_link_notification(struct switchtec_dev *stdev)
{
struct switchtec_ntb *sndev = stdev->sndev;
@@ -568,7 +579,7 @@ static int switchtec_ntb_link_enable(struct ntb_dev *ntb,
sndev->self_shared->link_sta = 1;
switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_UP);
- switchtec_ntb_check_link(sndev, MSG_CHECK_LINK);
+ switchtec_ntb_link_status_update(sndev);
return 0;
}
@@ -582,7 +593,7 @@ static int switchtec_ntb_link_disable(struct ntb_dev *ntb)
sndev->self_shared->link_sta = 0;
switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_DOWN);
- switchtec_ntb_check_link(sndev, MSG_CHECK_LINK);
+ switchtec_ntb_link_status_update(sndev);
return 0;
}
@@ -835,7 +846,8 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev)
sndev->ntb.topo = NTB_TOPO_SWITCH;
sndev->ntb.ops = &switchtec_ntb_ops;
- INIT_WORK(&sndev->link_reinit_work, link_reinit_work);
+ INIT_WORK(&sndev->check_link_status_work, check_link_status_work);
+ sndev->link_force_down = false;
sndev->self_partition = sndev->stdev->partition;
@@ -872,7 +884,7 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev)
}
sndev->peer_partition = ffs(tpart_vec) - 1;
- if (!(part_map & (1 << sndev->peer_partition))) {
+ if (!(part_map & (1ULL << sndev->peer_partition))) {
dev_err(&sndev->stdev->dev,
"ntb target partition is not NT partition\n");
return -ENODEV;
@@ -1448,10 +1460,16 @@ static void switchtec_ntb_deinit_db_msg_irq(struct switchtec_ntb *sndev)
static int switchtec_ntb_reinit_peer(struct switchtec_ntb *sndev)
{
- dev_info(&sndev->stdev->dev, "peer reinitialized\n");
- switchtec_ntb_deinit_shared_mw(sndev);
- switchtec_ntb_init_mw(sndev);
- return switchtec_ntb_init_shared_mw(sndev);
+ int rc;
+
+ if (crosslink_is_enabled(sndev))
+ return 0;
+
+ dev_info(&sndev->stdev->dev, "reinitialize shared memory window\n");
+ rc = config_rsvd_lut_win(sndev, sndev->mmio_peer_ctrl, 0,
+ sndev->self_partition,
+ sndev->self_shared_dma);
+ return rc;
}
static int switchtec_ntb_add(struct device *dev,
diff --git a/drivers/ntb/msi.c b/drivers/ntb/msi.c
new file mode 100644
index 000000000000..9dddf133658f
--- /dev/null
+++ b/drivers/ntb/msi.c
@@ -0,0 +1,415 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/ntb.h>
+#include <linux/msi.h>
+#include <linux/pci.h>
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION("0.1");
+MODULE_AUTHOR("Logan Gunthorpe <logang@deltatee.com>");
+MODULE_DESCRIPTION("NTB MSI Interrupt Library");
+
+struct ntb_msi {
+ u64 base_addr;
+ u64 end_addr;
+
+ void (*desc_changed)(void *ctx);
+
+ u32 __iomem *peer_mws[];
+};
+
+/**
+ * ntb_msi_init() - Initialize the MSI context
+ * @ntb: NTB device context
+ *
+ * This function must be called before any other ntb_msi function.
+ * It initializes the context for MSI operations and maps
+ * the peer memory windows.
+ *
+ * This function reserves the last N outbound memory windows (where N
+ * is the number of peers).
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+int ntb_msi_init(struct ntb_dev *ntb,
+ void (*desc_changed)(void *ctx))
+{
+ phys_addr_t mw_phys_addr;
+ resource_size_t mw_size;
+ size_t struct_size;
+ int peer_widx;
+ int peers;
+ int ret;
+ int i;
+
+ peers = ntb_peer_port_count(ntb);
+ if (peers <= 0)
+ return -EINVAL;
+
+ struct_size = sizeof(*ntb->msi) + sizeof(*ntb->msi->peer_mws) * peers;
+
+ ntb->msi = devm_kzalloc(&ntb->dev, struct_size, GFP_KERNEL);
+ if (!ntb->msi)
+ return -ENOMEM;
+
+ ntb->msi->desc_changed = desc_changed;
+
+ for (i = 0; i < peers; i++) {
+ peer_widx = ntb_peer_mw_count(ntb) - 1 - i;
+
+ ret = ntb_peer_mw_get_addr(ntb, peer_widx, &mw_phys_addr,
+ &mw_size);
+ if (ret)
+ goto unroll;
+
+ ntb->msi->peer_mws[i] = devm_ioremap(&ntb->dev, mw_phys_addr,
+ mw_size);
+ if (!ntb->msi->peer_mws[i]) {
+ ret = -EFAULT;
+ goto unroll;
+ }
+ }
+
+ return 0;
+
+unroll:
+ for (i = 0; i < peers; i++)
+ if (ntb->msi->peer_mws[i])
+ devm_iounmap(&ntb->dev, ntb->msi->peer_mws[i]);
+
+ devm_kfree(&ntb->dev, ntb->msi);
+ ntb->msi = NULL;
+ return ret;
+}
+EXPORT_SYMBOL(ntb_msi_init);
+
+/**
+ * ntb_msi_setup_mws() - Initialize the MSI inbound memory windows
+ * @ntb: NTB device context
+ *
+ * This function sets up the required inbound memory windows. It should be
+ * called from a work function after a link up event.
+ *
+ * Over the entire network, this function will reserves the last N
+ * inbound memory windows for each peer (where N is the number of peers).
+ *
+ * ntb_msi_init() must be called before this function.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+int ntb_msi_setup_mws(struct ntb_dev *ntb)
+{
+ struct msi_desc *desc;
+ u64 addr;
+ int peer, peer_widx;
+ resource_size_t addr_align, size_align, size_max;
+ resource_size_t mw_size = SZ_32K;
+ resource_size_t mw_min_size = mw_size;
+ int i;
+ int ret;
+
+ if (!ntb->msi)
+ return -EINVAL;
+
+ desc = first_msi_entry(&ntb->pdev->dev);
+ addr = desc->msg.address_lo + ((uint64_t)desc->msg.address_hi << 32);
+
+ for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) {
+ peer_widx = ntb_peer_highest_mw_idx(ntb, peer);
+ if (peer_widx < 0)
+ return peer_widx;
+
+ ret = ntb_mw_get_align(ntb, peer, peer_widx, &addr_align,
+ NULL, NULL);
+ if (ret)
+ return ret;
+
+ addr &= ~(addr_align - 1);
+ }
+
+ for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) {
+ peer_widx = ntb_peer_highest_mw_idx(ntb, peer);
+ if (peer_widx < 0) {
+ ret = peer_widx;
+ goto error_out;
+ }
+
+ ret = ntb_mw_get_align(ntb, peer, peer_widx, NULL,
+ &size_align, &size_max);
+ if (ret)
+ goto error_out;
+
+ mw_size = round_up(mw_size, size_align);
+ mw_size = max(mw_size, size_max);
+ if (mw_size < mw_min_size)
+ mw_min_size = mw_size;
+
+ ret = ntb_mw_set_trans(ntb, peer, peer_widx,
+ addr, mw_size);
+ if (ret)
+ goto error_out;
+ }
+
+ ntb->msi->base_addr = addr;
+ ntb->msi->end_addr = addr + mw_min_size;
+
+ return 0;
+
+error_out:
+ for (i = 0; i < peer; i++) {
+ peer_widx = ntb_peer_highest_mw_idx(ntb, peer);
+ if (peer_widx < 0)
+ continue;
+
+ ntb_mw_clear_trans(ntb, i, peer_widx);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(ntb_msi_setup_mws);
+
+/**
+ * ntb_msi_clear_mws() - Clear all inbound memory windows
+ * @ntb: NTB device context
+ *
+ * This function tears down the resources used by ntb_msi_setup_mws().
+ */
+void ntb_msi_clear_mws(struct ntb_dev *ntb)
+{
+ int peer;
+ int peer_widx;
+
+ for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) {
+ peer_widx = ntb_peer_highest_mw_idx(ntb, peer);
+ if (peer_widx < 0)
+ continue;
+
+ ntb_mw_clear_trans(ntb, peer, peer_widx);
+ }
+}
+EXPORT_SYMBOL(ntb_msi_clear_mws);
+
+struct ntb_msi_devres {
+ struct ntb_dev *ntb;
+ struct msi_desc *entry;
+ struct ntb_msi_desc *msi_desc;
+};
+
+static int ntb_msi_set_desc(struct ntb_dev *ntb, struct msi_desc *entry,
+ struct ntb_msi_desc *msi_desc)
+{
+ u64 addr;
+
+ addr = entry->msg.address_lo +
+ ((uint64_t)entry->msg.address_hi << 32);
+
+ if (addr < ntb->msi->base_addr || addr >= ntb->msi->end_addr) {
+ dev_warn_once(&ntb->dev,
+ "IRQ %d: MSI Address not within the memory window (%llx, [%llx %llx])\n",
+ entry->irq, addr, ntb->msi->base_addr,
+ ntb->msi->end_addr);
+ return -EFAULT;
+ }
+
+ msi_desc->addr_offset = addr - ntb->msi->base_addr;
+ msi_desc->data = entry->msg.data;
+
+ return 0;
+}
+
+static void ntb_msi_write_msg(struct msi_desc *entry, void *data)
+{
+ struct ntb_msi_devres *dr = data;
+
+ WARN_ON(ntb_msi_set_desc(dr->ntb, entry, dr->msi_desc));
+
+ if (dr->ntb->msi->desc_changed)
+ dr->ntb->msi->desc_changed(dr->ntb->ctx);
+}
+
+static void ntbm_msi_callback_release(struct device *dev, void *res)
+{
+ struct ntb_msi_devres *dr = res;
+
+ dr->entry->write_msi_msg = NULL;
+ dr->entry->write_msi_msg_data = NULL;
+}
+
+static int ntbm_msi_setup_callback(struct ntb_dev *ntb, struct msi_desc *entry,
+ struct ntb_msi_desc *msi_desc)
+{
+ struct ntb_msi_devres *dr;
+
+ dr = devres_alloc(ntbm_msi_callback_release,
+ sizeof(struct ntb_msi_devres), GFP_KERNEL);
+ if (!dr)
+ return -ENOMEM;
+
+ dr->ntb = ntb;
+ dr->entry = entry;
+ dr->msi_desc = msi_desc;
+
+ devres_add(&ntb->dev, dr);
+
+ dr->entry->write_msi_msg = ntb_msi_write_msg;
+ dr->entry->write_msi_msg_data = dr;
+
+ return 0;
+}
+
+/**
+ * ntbm_msi_request_threaded_irq() - allocate an MSI interrupt
+ * @ntb: NTB device context
+ * @handler: Function to be called when the IRQ occurs
+ * @thread_fn: Function to be called in a threaded interrupt context. NULL
+ * for clients which handle everything in @handler
+ * @devname: An ascii name for the claiming device, dev_name(dev) if NULL
+ * @dev_id: A cookie passed back to the handler function
+ *
+ * This function assigns an interrupt handler to an unused
+ * MSI interrupt and returns the descriptor used to trigger
+ * it. The descriptor can then be sent to a peer to trigger
+ * the interrupt.
+ *
+ * The interrupt resource is managed with devres so it will
+ * be automatically freed when the NTB device is torn down.
+ *
+ * If an IRQ allocated with this function needs to be freed
+ * separately, ntbm_free_irq() must be used.
+ *
+ * Return: IRQ number assigned on success, otherwise a negative error number.
+ */
+int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler,
+ irq_handler_t thread_fn,
+ const char *name, void *dev_id,
+ struct ntb_msi_desc *msi_desc)
+{
+ struct msi_desc *entry;
+ struct irq_desc *desc;
+ int ret;
+
+ if (!ntb->msi)
+ return -EINVAL;
+
+ for_each_pci_msi_entry(entry, ntb->pdev) {
+ desc = irq_to_desc(entry->irq);
+ if (desc->action)
+ continue;
+
+ ret = devm_request_threaded_irq(&ntb->dev, entry->irq, handler,
+ thread_fn, 0, name, dev_id);
+ if (ret)
+ continue;
+
+ if (ntb_msi_set_desc(ntb, entry, msi_desc)) {
+ devm_free_irq(&ntb->dev, entry->irq, dev_id);
+ continue;
+ }
+
+ ret = ntbm_msi_setup_callback(ntb, entry, msi_desc);
+ if (ret) {
+ devm_free_irq(&ntb->dev, entry->irq, dev_id);
+ return ret;
+ }
+
+
+ return entry->irq;
+ }
+
+ return -ENODEV;
+}
+EXPORT_SYMBOL(ntbm_msi_request_threaded_irq);
+
+static int ntbm_msi_callback_match(struct device *dev, void *res, void *data)
+{
+ struct ntb_dev *ntb = dev_ntb(dev);
+ struct ntb_msi_devres *dr = res;
+
+ return dr->ntb == ntb && dr->entry == data;
+}
+
+/**
+ * ntbm_msi_free_irq() - free an interrupt
+ * @ntb: NTB device context
+ * @irq: Interrupt line to free
+ * @dev_id: Device identity to free
+ *
+ * This function should be used to manually free IRQs allocated with
+ * ntbm_request_[threaded_]irq().
+ */
+void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, void *dev_id)
+{
+ struct msi_desc *entry = irq_get_msi_desc(irq);
+
+ entry->write_msi_msg = NULL;
+ entry->write_msi_msg_data = NULL;
+
+ WARN_ON(devres_destroy(&ntb->dev, ntbm_msi_callback_release,
+ ntbm_msi_callback_match, entry));
+
+ devm_free_irq(&ntb->dev, irq, dev_id);
+}
+EXPORT_SYMBOL(ntbm_msi_free_irq);
+
+/**
+ * ntb_msi_peer_trigger() - Trigger an interrupt handler on a peer
+ * @ntb: NTB device context
+ * @peer: Peer index
+ * @desc: MSI descriptor data which triggers the interrupt
+ *
+ * This function triggers an interrupt on a peer. It requires
+ * the descriptor structure to have been passed from that peer
+ * by some other means.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer,
+ struct ntb_msi_desc *desc)
+{
+ int idx;
+
+ if (!ntb->msi)
+ return -EINVAL;
+
+ idx = desc->addr_offset / sizeof(*ntb->msi->peer_mws[peer]);
+
+ iowrite32(desc->data, &ntb->msi->peer_mws[peer][idx]);
+
+ return 0;
+}
+EXPORT_SYMBOL(ntb_msi_peer_trigger);
+
+/**
+ * ntb_msi_peer_addr() - Get the DMA address to trigger a peer's MSI interrupt
+ * @ntb: NTB device context
+ * @peer: Peer index
+ * @desc: MSI descriptor data which triggers the interrupt
+ * @msi_addr: Physical address to trigger the interrupt
+ *
+ * This function allows using DMA engines to trigger an interrupt
+ * (for example, trigger an interrupt to process the data after
+ * sending it). To trigger the interrupt, write @desc.data to the address
+ * returned in @msi_addr
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer,
+ struct ntb_msi_desc *desc,
+ phys_addr_t *msi_addr)
+{
+ int peer_widx = ntb_peer_mw_count(ntb) - 1 - peer;
+ phys_addr_t mw_phys_addr;
+ int ret;
+
+ ret = ntb_peer_mw_get_addr(ntb, peer_widx, &mw_phys_addr, NULL);
+ if (ret)
+ return ret;
+
+ if (msi_addr)
+ *msi_addr = mw_phys_addr + desc->addr_offset;
+
+ return 0;
+}
+EXPORT_SYMBOL(ntb_msi_peer_addr);
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index d4f39ba1d976..40c90ca10729 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -93,6 +93,12 @@ static bool use_dma;
module_param(use_dma, bool, 0644);
MODULE_PARM_DESC(use_dma, "Use DMA engine to perform large data copy");
+static bool use_msi;
+#ifdef CONFIG_NTB_MSI
+module_param(use_msi, bool, 0644);
+MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells");
+#endif
+
static struct dentry *nt_debugfs_dir;
/* Only two-ports NTB devices are supported */
@@ -188,6 +194,11 @@ struct ntb_transport_qp {
u64 tx_err_no_buf;
u64 tx_memcpy;
u64 tx_async;
+
+ bool use_msi;
+ int msi_irq;
+ struct ntb_msi_desc msi_desc;
+ struct ntb_msi_desc peer_msi_desc;
};
struct ntb_transport_mw {
@@ -221,6 +232,10 @@ struct ntb_transport_ctx {
u64 qp_bitmap;
u64 qp_bitmap_free;
+ bool use_msi;
+ unsigned int msi_spad_offset;
+ u64 msi_db_mask;
+
bool link_is_up;
struct delayed_work link_work;
struct work_struct link_cleanup;
@@ -667,6 +682,114 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
return 0;
}
+static irqreturn_t ntb_transport_isr(int irq, void *dev)
+{
+ struct ntb_transport_qp *qp = dev;
+
+ tasklet_schedule(&qp->rxc_db_work);
+
+ return IRQ_HANDLED;
+}
+
+static void ntb_transport_setup_qp_peer_msi(struct ntb_transport_ctx *nt,
+ unsigned int qp_num)
+{
+ struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
+ int spad = qp_num * 2 + nt->msi_spad_offset;
+
+ if (!nt->use_msi)
+ return;
+
+ if (spad >= ntb_spad_count(nt->ndev))
+ return;
+
+ qp->peer_msi_desc.addr_offset =
+ ntb_peer_spad_read(qp->ndev, PIDX, spad);
+ qp->peer_msi_desc.data =
+ ntb_peer_spad_read(qp->ndev, PIDX, spad + 1);
+
+ dev_dbg(&qp->ndev->pdev->dev, "QP%d Peer MSI addr=%x data=%x\n",
+ qp_num, qp->peer_msi_desc.addr_offset, qp->peer_msi_desc.data);
+
+ if (qp->peer_msi_desc.addr_offset) {
+ qp->use_msi = true;
+ dev_info(&qp->ndev->pdev->dev,
+ "Using MSI interrupts for QP%d\n", qp_num);
+ }
+}
+
+static void ntb_transport_setup_qp_msi(struct ntb_transport_ctx *nt,
+ unsigned int qp_num)
+{
+ struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
+ int spad = qp_num * 2 + nt->msi_spad_offset;
+ int rc;
+
+ if (!nt->use_msi)
+ return;
+
+ if (spad >= ntb_spad_count(nt->ndev)) {
+ dev_warn_once(&qp->ndev->pdev->dev,
+ "Not enough SPADS to use MSI interrupts\n");
+ return;
+ }
+
+ ntb_spad_write(qp->ndev, spad, 0);
+ ntb_spad_write(qp->ndev, spad + 1, 0);
+
+ if (!qp->msi_irq) {
+ qp->msi_irq = ntbm_msi_request_irq(qp->ndev, ntb_transport_isr,
+ KBUILD_MODNAME, qp,
+ &qp->msi_desc);
+ if (qp->msi_irq < 0) {
+ dev_warn(&qp->ndev->pdev->dev,
+ "Unable to allocate MSI interrupt for qp%d\n",
+ qp_num);
+ return;
+ }
+ }
+
+ rc = ntb_spad_write(qp->ndev, spad, qp->msi_desc.addr_offset);
+ if (rc)
+ goto err_free_interrupt;
+
+ rc = ntb_spad_write(qp->ndev, spad + 1, qp->msi_desc.data);
+ if (rc)
+ goto err_free_interrupt;
+
+ dev_dbg(&qp->ndev->pdev->dev, "QP%d MSI %d addr=%x data=%x\n",
+ qp_num, qp->msi_irq, qp->msi_desc.addr_offset,
+ qp->msi_desc.data);
+
+ return;
+
+err_free_interrupt:
+ devm_free_irq(&nt->ndev->dev, qp->msi_irq, qp);
+}
+
+static void ntb_transport_msi_peer_desc_changed(struct ntb_transport_ctx *nt)
+{
+ int i;
+
+ dev_dbg(&nt->ndev->pdev->dev, "Peer MSI descriptors changed");
+
+ for (i = 0; i < nt->qp_count; i++)
+ ntb_transport_setup_qp_peer_msi(nt, i);
+}
+
+static void ntb_transport_msi_desc_changed(void *data)
+{
+ struct ntb_transport_ctx *nt = data;
+ int i;
+
+ dev_dbg(&nt->ndev->pdev->dev, "MSI descriptors changed");
+
+ for (i = 0; i < nt->qp_count; i++)
+ ntb_transport_setup_qp_msi(nt, i);
+
+ ntb_peer_db_set(nt->ndev, nt->msi_db_mask);
+}
+
static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
{
struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
@@ -905,6 +1028,20 @@ static void ntb_transport_link_work(struct work_struct *work)
int rc = 0, i, spad;
/* send the local info, in the opposite order of the way we read it */
+
+ if (nt->use_msi) {
+ rc = ntb_msi_setup_mws(ndev);
+ if (rc) {
+ dev_warn(&pdev->dev,
+ "Failed to register MSI memory window: %d\n",
+ rc);
+ nt->use_msi = false;
+ }
+ }
+
+ for (i = 0; i < nt->qp_count; i++)
+ ntb_transport_setup_qp_msi(nt, i);
+
for (i = 0; i < nt->mw_count; i++) {
size = nt->mw_vec[i].phys_size;
@@ -962,6 +1099,7 @@ static void ntb_transport_link_work(struct work_struct *work)
struct ntb_transport_qp *qp = &nt->qp_vec[i];
ntb_transport_setup_qp_mw(nt, i);
+ ntb_transport_setup_qp_peer_msi(nt, i);
if (qp->client_ready)
schedule_delayed_work(&qp->link_work, 0);
@@ -1135,6 +1273,19 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
return -ENOMEM;
nt->ndev = ndev;
+
+ /*
+ * If we are using MSI, and have at least one extra memory window,
+ * we will reserve the last MW for the MSI window.
+ */
+ if (use_msi && mw_count > 1) {
+ rc = ntb_msi_init(ndev, ntb_transport_msi_desc_changed);
+ if (!rc) {
+ mw_count -= 1;
+ nt->use_msi = true;
+ }
+ }
+
spad_count = ntb_spad_count(ndev);
/* Limit the MW's based on the availability of scratchpads */
@@ -1148,6 +1299,8 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
max_mw_count_for_spads = (spad_count - MW0_SZ_HIGH) / 2;
nt->mw_count = min(mw_count, max_mw_count_for_spads);
+ nt->msi_spad_offset = nt->mw_count * 2 + MW0_SZ_HIGH;
+
nt->mw_vec = kcalloc_node(mw_count, sizeof(*nt->mw_vec),
GFP_KERNEL, node);
if (!nt->mw_vec) {
@@ -1178,6 +1331,12 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
qp_bitmap = ntb_db_valid_mask(ndev);
qp_count = ilog2(qp_bitmap);
+ if (nt->use_msi) {
+ qp_count -= 1;
+ nt->msi_db_mask = 1 << qp_count;
+ ntb_db_clear_mask(ndev, nt->msi_db_mask);
+ }
+
if (max_num_clients && max_num_clients < qp_count)
qp_count = max_num_clients;
else if (nt->mw_count < qp_count)
@@ -1601,7 +1760,10 @@ static void ntb_tx_copy_callback(void *data,
iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags);
- ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num));
+ if (qp->use_msi)
+ ntb_msi_peer_trigger(qp->ndev, PIDX, &qp->peer_msi_desc);
+ else
+ ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num));
/* The entry length can only be zero if the packet is intended to be a
* "link down" or similar. Since no payload is being sent in these
@@ -1869,6 +2031,7 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
qp->rx_dma_chan = NULL;
}
+ qp->tx_mw_dma_addr = 0;
if (qp->tx_dma_chan) {
qp->tx_mw_dma_addr =
dma_map_resource(qp->tx_dma_chan->device->dev,
@@ -2268,6 +2431,11 @@ static void ntb_transport_doorbell_callback(void *data, int vector)
u64 db_bits;
unsigned int qp_num;
+ if (ntb_db_read(nt->ndev) & nt->msi_db_mask) {
+ ntb_transport_msi_peer_desc_changed(nt);
+ ntb_db_clear(nt->ndev, nt->msi_db_mask);
+ }
+
db_bits = (nt->qp_bitmap & ~nt->qp_bitmap_free &
ntb_db_vector_mask(nt->ndev, vector));
diff --git a/drivers/ntb/test/Kconfig b/drivers/ntb/test/Kconfig
index a8db00a7e087..516b991f33b9 100644
--- a/drivers/ntb/test/Kconfig
+++ b/drivers/ntb/test/Kconfig
@@ -26,3 +26,12 @@ config NTB_PERF
to and from the window without additional software interaction.
If unsure, say N.
+
+config NTB_MSI_TEST
+ tristate "NTB MSI Test Client"
+ depends on NTB_MSI
+ help
+ This tool demonstrates the use of the NTB MSI library to
+ send MSI interrupts between peers.
+
+ If unsure, say N.
diff --git a/drivers/ntb/test/Makefile b/drivers/ntb/test/Makefile
index cbfd67622ef7..19ed91d8a3b1 100644
--- a/drivers/ntb/test/Makefile
+++ b/drivers/ntb/test/Makefile
@@ -2,3 +2,4 @@
obj-$(CONFIG_NTB_PINGPONG) += ntb_pingpong.o
obj-$(CONFIG_NTB_TOOL) += ntb_tool.o
obj-$(CONFIG_NTB_PERF) += ntb_perf.o
+obj-$(CONFIG_NTB_MSI_TEST) += ntb_msi_test.o
diff --git a/drivers/ntb/test/ntb_msi_test.c b/drivers/ntb/test/ntb_msi_test.c
new file mode 100644
index 000000000000..99d826ed9c34
--- /dev/null
+++ b/drivers/ntb/test/ntb_msi_test.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/ntb.h>
+#include <linux/pci.h>
+#include <linux/radix-tree.h>
+#include <linux/workqueue.h>
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION("0.1");
+MODULE_AUTHOR("Logan Gunthorpe <logang@deltatee.com>");
+MODULE_DESCRIPTION("Test for sending MSI interrupts over an NTB memory window");
+
+static int num_irqs = 4;
+module_param(num_irqs, int, 0644);
+MODULE_PARM_DESC(num_irqs, "number of irqs to use");
+
+struct ntb_msit_ctx {
+ struct ntb_dev *ntb;
+ struct dentry *dbgfs_dir;
+ struct work_struct setup_work;
+
+ struct ntb_msit_isr_ctx {
+ int irq_idx;
+ int irq_num;
+ int occurrences;
+ struct ntb_msit_ctx *nm;
+ struct ntb_msi_desc desc;
+ } *isr_ctx;
+
+ struct ntb_msit_peer {
+ struct ntb_msit_ctx *nm;
+ int pidx;
+ int num_irqs;
+ struct completion init_comp;
+ struct ntb_msi_desc *msi_desc;
+ } peers[];
+};
+
+static struct dentry *ntb_msit_dbgfs_topdir;
+
+static irqreturn_t ntb_msit_isr(int irq, void *dev)
+{
+ struct ntb_msit_isr_ctx *isr_ctx = dev;
+ struct ntb_msit_ctx *nm = isr_ctx->nm;
+
+ dev_dbg(&nm->ntb->dev, "Interrupt Occurred: %d",
+ isr_ctx->irq_idx);
+
+ isr_ctx->occurrences++;
+
+ return IRQ_HANDLED;
+}
+
+static void ntb_msit_setup_work(struct work_struct *work)
+{
+ struct ntb_msit_ctx *nm = container_of(work, struct ntb_msit_ctx,
+ setup_work);
+ int irq_count = 0;
+ int irq;
+ int ret;
+ uintptr_t i;
+
+ ret = ntb_msi_setup_mws(nm->ntb);
+ if (ret) {
+ dev_err(&nm->ntb->dev, "Unable to setup MSI windows: %d\n",
+ ret);
+ return;
+ }
+
+ for (i = 0; i < num_irqs; i++) {
+ nm->isr_ctx[i].irq_idx = i;
+ nm->isr_ctx[i].nm = nm;
+
+ if (!nm->isr_ctx[i].irq_num) {
+ irq = ntbm_msi_request_irq(nm->ntb, ntb_msit_isr,
+ KBUILD_MODNAME,
+ &nm->isr_ctx[i],
+ &nm->isr_ctx[i].desc);
+ if (irq < 0)
+ break;
+
+ nm->isr_ctx[i].irq_num = irq;
+ }
+
+ ret = ntb_spad_write(nm->ntb, 2 * i + 1,
+ nm->isr_ctx[i].desc.addr_offset);
+ if (ret)
+ break;
+
+ ret = ntb_spad_write(nm->ntb, 2 * i + 2,
+ nm->isr_ctx[i].desc.data);
+ if (ret)
+ break;
+
+ irq_count++;
+ }
+
+ ntb_spad_write(nm->ntb, 0, irq_count);
+ ntb_peer_db_set(nm->ntb, BIT(ntb_port_number(nm->ntb)));
+}
+
+static void ntb_msit_desc_changed(void *ctx)
+{
+ struct ntb_msit_ctx *nm = ctx;
+ int i;
+
+ dev_dbg(&nm->ntb->dev, "MSI Descriptors Changed\n");
+
+ for (i = 0; i < num_irqs; i++) {
+ ntb_spad_write(nm->ntb, 2 * i + 1,
+ nm->isr_ctx[i].desc.addr_offset);
+ ntb_spad_write(nm->ntb, 2 * i + 2,
+ nm->isr_ctx[i].desc.data);
+ }
+
+ ntb_peer_db_set(nm->ntb, BIT(ntb_port_number(nm->ntb)));
+}
+
+static void ntb_msit_link_event(void *ctx)
+{
+ struct ntb_msit_ctx *nm = ctx;
+
+ if (!ntb_link_is_up(nm->ntb, NULL, NULL))
+ return;
+
+ schedule_work(&nm->setup_work);
+}
+
+static void ntb_msit_copy_peer_desc(struct ntb_msit_ctx *nm, int peer)
+{
+ int i;
+ struct ntb_msi_desc *desc = nm->peers[peer].msi_desc;
+ int irq_count = nm->peers[peer].num_irqs;
+
+ for (i = 0; i < irq_count; i++) {
+ desc[i].addr_offset = ntb_peer_spad_read(nm->ntb, peer,
+ 2 * i + 1);
+ desc[i].data = ntb_peer_spad_read(nm->ntb, peer, 2 * i + 2);
+ }
+
+ dev_info(&nm->ntb->dev, "Found %d interrupts on peer %d\n",
+ irq_count, peer);
+
+ complete_all(&nm->peers[peer].init_comp);
+}
+
+static void ntb_msit_db_event(void *ctx, int vec)
+{
+ struct ntb_msit_ctx *nm = ctx;
+ struct ntb_msi_desc *desc;
+ u64 peer_mask = ntb_db_read(nm->ntb);
+ u32 irq_count;
+ int peer;
+
+ ntb_db_clear(nm->ntb, peer_mask);
+
+ for (peer = 0; peer < sizeof(peer_mask) * 8; peer++) {
+ if (!(peer_mask & BIT(peer)))
+ continue;
+
+ irq_count = ntb_peer_spad_read(nm->ntb, peer, 0);
+ if (irq_count == -1)
+ continue;
+
+ desc = kcalloc(irq_count, sizeof(*desc), GFP_ATOMIC);
+ if (!desc)
+ continue;
+
+ kfree(nm->peers[peer].msi_desc);
+ nm->peers[peer].msi_desc = desc;
+ nm->peers[peer].num_irqs = irq_count;
+
+ ntb_msit_copy_peer_desc(nm, peer);
+ }
+}
+
+static const struct ntb_ctx_ops ntb_msit_ops = {
+ .link_event = ntb_msit_link_event,
+ .db_event = ntb_msit_db_event,
+};
+
+static int ntb_msit_dbgfs_trigger(void *data, u64 idx)
+{
+ struct ntb_msit_peer *peer = data;
+
+ if (idx >= peer->num_irqs)
+ return -EINVAL;
+
+ dev_dbg(&peer->nm->ntb->dev, "trigger irq %llu on peer %u\n",
+ idx, peer->pidx);
+
+ return ntb_msi_peer_trigger(peer->nm->ntb, peer->pidx,
+ &peer->msi_desc[idx]);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_trigger_fops, NULL,
+ ntb_msit_dbgfs_trigger, "%llu\n");
+
+static int ntb_msit_dbgfs_port_get(void *data, u64 *port)
+{
+ struct ntb_msit_peer *peer = data;
+
+ *port = ntb_peer_port_number(peer->nm->ntb, peer->pidx);
+
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_port_fops, ntb_msit_dbgfs_port_get,
+ NULL, "%llu\n");
+
+static int ntb_msit_dbgfs_count_get(void *data, u64 *count)
+{
+ struct ntb_msit_peer *peer = data;
+
+ *count = peer->num_irqs;
+
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_count_fops, ntb_msit_dbgfs_count_get,
+ NULL, "%llu\n");
+
+static int ntb_msit_dbgfs_ready_get(void *data, u64 *ready)
+{
+ struct ntb_msit_peer *peer = data;
+
+ *ready = try_wait_for_completion(&peer->init_comp);
+
+ return 0;
+}
+
+static int ntb_msit_dbgfs_ready_set(void *data, u64 ready)
+{
+ struct ntb_msit_peer *peer = data;
+
+ return wait_for_completion_interruptible(&peer->init_comp);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_ready_fops, ntb_msit_dbgfs_ready_get,
+ ntb_msit_dbgfs_ready_set, "%llu\n");
+
+static int ntb_msit_dbgfs_occurrences_get(void *data, u64 *occurrences)
+{
+ struct ntb_msit_isr_ctx *isr_ctx = data;
+
+ *occurrences = isr_ctx->occurrences;
+
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_occurrences_fops,
+ ntb_msit_dbgfs_occurrences_get,
+ NULL, "%llu\n");
+
+static int ntb_msit_dbgfs_local_port_get(void *data, u64 *port)
+{
+ struct ntb_msit_ctx *nm = data;
+
+ *port = ntb_port_number(nm->ntb);
+
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_local_port_fops,
+ ntb_msit_dbgfs_local_port_get,
+ NULL, "%llu\n");
+
+static void ntb_msit_create_dbgfs(struct ntb_msit_ctx *nm)
+{
+ struct pci_dev *pdev = nm->ntb->pdev;
+ char buf[32];
+ int i;
+ struct dentry *peer_dir;
+
+ nm->dbgfs_dir = debugfs_create_dir(pci_name(pdev),
+ ntb_msit_dbgfs_topdir);
+ debugfs_create_file("port", 0400, nm->dbgfs_dir, nm,
+ &ntb_msit_local_port_fops);
+
+ for (i = 0; i < ntb_peer_port_count(nm->ntb); i++) {
+ nm->peers[i].pidx = i;
+ nm->peers[i].nm = nm;
+ init_completion(&nm->peers[i].init_comp);
+
+ snprintf(buf, sizeof(buf), "peer%d", i);
+ peer_dir = debugfs_create_dir(buf, nm->dbgfs_dir);
+
+ debugfs_create_file_unsafe("trigger", 0200, peer_dir,
+ &nm->peers[i],
+ &ntb_msit_trigger_fops);
+
+ debugfs_create_file_unsafe("port", 0400, peer_dir,
+ &nm->peers[i], &ntb_msit_port_fops);
+
+ debugfs_create_file_unsafe("count", 0400, peer_dir,
+ &nm->peers[i],
+ &ntb_msit_count_fops);
+
+ debugfs_create_file_unsafe("ready", 0600, peer_dir,
+ &nm->peers[i],
+ &ntb_msit_ready_fops);
+ }
+
+ for (i = 0; i < num_irqs; i++) {
+ snprintf(buf, sizeof(buf), "irq%d_occurrences", i);
+ debugfs_create_file_unsafe(buf, 0400, nm->dbgfs_dir,
+ &nm->isr_ctx[i],
+ &ntb_msit_occurrences_fops);
+ }
+}
+
+static void ntb_msit_remove_dbgfs(struct ntb_msit_ctx *nm)
+{
+ debugfs_remove_recursive(nm->dbgfs_dir);
+}
+
+static int ntb_msit_probe(struct ntb_client *client, struct ntb_dev *ntb)
+{
+ struct ntb_msit_ctx *nm;
+ size_t struct_size;
+ int peers;
+ int ret;
+
+ peers = ntb_peer_port_count(ntb);
+ if (peers <= 0)
+ return -EINVAL;
+
+ if (ntb_spad_is_unsafe(ntb) || ntb_spad_count(ntb) < 2 * num_irqs + 1) {
+ dev_err(&ntb->dev, "NTB MSI test requires at least %d spads for %d irqs\n",
+ 2 * num_irqs + 1, num_irqs);
+ return -EFAULT;
+ }
+
+ ret = ntb_spad_write(ntb, 0, -1);
+ if (ret) {
+ dev_err(&ntb->dev, "Unable to write spads: %d\n", ret);
+ return ret;
+ }
+
+ ret = ntb_db_clear_mask(ntb, GENMASK(peers - 1, 0));
+ if (ret) {
+ dev_err(&ntb->dev, "Unable to clear doorbell mask: %d\n", ret);
+ return ret;
+ }
+
+ ret = ntb_msi_init(ntb, ntb_msit_desc_changed);
+ if (ret) {
+ dev_err(&ntb->dev, "Unable to initialize MSI library: %d\n",
+ ret);
+ return ret;
+ }
+
+ struct_size = sizeof(*nm) + sizeof(*nm->peers) * peers;
+
+ nm = devm_kzalloc(&ntb->dev, struct_size, GFP_KERNEL);
+ if (!nm)
+ return -ENOMEM;
+
+ nm->isr_ctx = devm_kcalloc(&ntb->dev, num_irqs, sizeof(*nm->isr_ctx),
+ GFP_KERNEL);
+ if (!nm->isr_ctx)
+ return -ENOMEM;
+
+ INIT_WORK(&nm->setup_work, ntb_msit_setup_work);
+ nm->ntb = ntb;
+
+ ntb_msit_create_dbgfs(nm);
+
+ ret = ntb_set_ctx(ntb, nm, &ntb_msit_ops);
+ if (ret)
+ goto remove_dbgfs;
+
+ if (!nm->isr_ctx)
+ goto remove_dbgfs;
+
+ ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+
+ return 0;
+
+remove_dbgfs:
+ ntb_msit_remove_dbgfs(nm);
+ devm_kfree(&ntb->dev, nm->isr_ctx);
+ devm_kfree(&ntb->dev, nm);
+ return ret;
+}
+
+static void ntb_msit_remove(struct ntb_client *client, struct ntb_dev *ntb)
+{
+ struct ntb_msit_ctx *nm = ntb->ctx;
+ int i;
+
+ ntb_link_disable(ntb);
+ ntb_db_set_mask(ntb, ntb_db_valid_mask(ntb));
+ ntb_msi_clear_mws(ntb);
+
+ for (i = 0; i < ntb_peer_port_count(ntb); i++)
+ kfree(nm->peers[i].msi_desc);
+
+ ntb_clear_ctx(ntb);
+ ntb_msit_remove_dbgfs(nm);
+}
+
+static struct ntb_client ntb_msit_client = {
+ .ops = {
+ .probe = ntb_msit_probe,
+ .remove = ntb_msit_remove
+ }
+};
+
+static int __init ntb_msit_init(void)
+{
+ int ret;
+
+ if (debugfs_initialized())
+ ntb_msit_dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME,
+ NULL);
+
+ ret = ntb_register_client(&ntb_msit_client);
+ if (ret)
+ debugfs_remove_recursive(ntb_msit_dbgfs_topdir);
+
+ return ret;
+}
+module_init(ntb_msit_init);
+
+static void __exit ntb_msit_exit(void)
+{
+ ntb_unregister_client(&ntb_msit_client);
+ debugfs_remove_recursive(ntb_msit_dbgfs_topdir);
+}
+module_exit(ntb_msit_exit);
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index 11a6cd374004..d028331558ea 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c
@@ -100,7 +100,7 @@ MODULE_DESCRIPTION("PCIe NTB Performance Measurement Tool");
#define DMA_TRIES 100
#define DMA_MDELAY 10
-#define MSG_TRIES 500
+#define MSG_TRIES 1000
#define MSG_UDELAY_LOW 1000
#define MSG_UDELAY_HIGH 2000
@@ -734,8 +734,6 @@ static void perf_disable_service(struct perf_ctx *perf)
{
int pidx;
- ntb_link_disable(perf->ntb);
-
if (perf->cmd_send == perf_msg_cmd_send) {
u64 inbits;
@@ -752,6 +750,16 @@ static void perf_disable_service(struct perf_ctx *perf)
for (pidx = 0; pidx < perf->pcnt; pidx++)
flush_work(&perf->peers[pidx].service);
+
+ for (pidx = 0; pidx < perf->pcnt; pidx++) {
+ struct perf_peer *peer = &perf->peers[pidx];
+
+ ntb_spad_write(perf->ntb, PERF_SPAD_CMD(peer->gidx), 0);
+ }
+
+ ntb_db_clear(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx));
+
+ ntb_link_disable(perf->ntb);
}
/*==============================================================================
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 59a6d232f77a..0884bedcfc7a 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -192,6 +192,9 @@ static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
static void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
{
+ if (desc->msi_attrib.is_virtual)
+ return NULL;
+
return desc->mask_base +
desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
}
@@ -206,14 +209,19 @@ static void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag)
{
u32 mask_bits = desc->masked;
+ void __iomem *desc_addr;
if (pci_msi_ignore_mask)
return 0;
+ desc_addr = pci_msix_desc_addr(desc);
+ if (!desc_addr)
+ return 0;
mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
if (flag)
mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
- writel(mask_bits, pci_msix_desc_addr(desc) + PCI_MSIX_ENTRY_VECTOR_CTRL);
+
+ writel(mask_bits, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
return mask_bits;
}
@@ -273,6 +281,11 @@ void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
if (entry->msi_attrib.is_msix) {
void __iomem *base = pci_msix_desc_addr(entry);
+ if (!base) {
+ WARN_ON(1);
+ return;
+ }
+
msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
@@ -303,6 +316,9 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
} else if (entry->msi_attrib.is_msix) {
void __iomem *base = pci_msix_desc_addr(entry);
+ if (!base)
+ goto skip;
+
writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
@@ -327,7 +343,13 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
msg->data);
}
}
+
+skip:
entry->msg = *msg;
+
+ if (entry->write_msi_msg)
+ entry->write_msi_msg(entry, entry->write_msi_msg_data);
+
}
void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg)
@@ -550,6 +572,7 @@ msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd)
entry->msi_attrib.is_msix = 0;
entry->msi_attrib.is_64 = !!(control & PCI_MSI_FLAGS_64BIT);
+ entry->msi_attrib.is_virtual = 0;
entry->msi_attrib.entry_nr = 0;
entry->msi_attrib.maskbit = !!(control & PCI_MSI_FLAGS_MASKBIT);
entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */
@@ -674,6 +697,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
struct irq_affinity_desc *curmsk, *masks = NULL;
struct msi_desc *entry;
int ret, i;
+ int vec_count = pci_msix_vec_count(dev);
if (affd)
masks = irq_create_affinity_masks(nvec, affd);
@@ -696,6 +720,10 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
entry->msi_attrib.entry_nr = entries[i].entry;
else
entry->msi_attrib.entry_nr = i;
+
+ entry->msi_attrib.is_virtual =
+ entry->msi_attrib.entry_nr >= vec_count;
+
entry->msi_attrib.default_irq = dev->irq;
entry->mask_base = base;
@@ -714,12 +742,19 @@ static void msix_program_entries(struct pci_dev *dev,
{
struct msi_desc *entry;
int i = 0;
+ void __iomem *desc_addr;
for_each_pci_msi_entry(entry, dev) {
if (entries)
entries[i++].vector = entry->irq;
- entry->masked = readl(pci_msix_desc_addr(entry) +
- PCI_MSIX_ENTRY_VECTOR_CTRL);
+
+ desc_addr = pci_msix_desc_addr(entry);
+ if (desc_addr)
+ entry->masked = readl(desc_addr +
+ PCI_MSIX_ENTRY_VECTOR_CTRL);
+ else
+ entry->masked = 0;
+
msix_mask_irq(entry, 1);
}
}
@@ -932,7 +967,7 @@ int pci_msix_vec_count(struct pci_dev *dev)
EXPORT_SYMBOL(pci_msix_vec_count);
static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
- int nvec, struct irq_affinity *affd)
+ int nvec, struct irq_affinity *affd, int flags)
{
int nr_entries;
int i, j;
@@ -943,7 +978,7 @@ static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
nr_entries = pci_msix_vec_count(dev);
if (nr_entries < 0)
return nr_entries;
- if (nvec > nr_entries)
+ if (nvec > nr_entries && !(flags & PCI_IRQ_VIRTUAL))
return nr_entries;
if (entries) {
@@ -1079,7 +1114,8 @@ EXPORT_SYMBOL(pci_enable_msi);
static int __pci_enable_msix_range(struct pci_dev *dev,
struct msix_entry *entries, int minvec,
- int maxvec, struct irq_affinity *affd)
+ int maxvec, struct irq_affinity *affd,
+ int flags)
{
int rc, nvec = maxvec;
@@ -1096,7 +1132,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
return -ENOSPC;
}
- rc = __pci_enable_msix(dev, entries, nvec, affd);
+ rc = __pci_enable_msix(dev, entries, nvec, affd, flags);
if (rc == 0)
return nvec;
@@ -1127,7 +1163,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
int minvec, int maxvec)
{
- return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL);
+ return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0);
}
EXPORT_SYMBOL(pci_enable_msix_range);
@@ -1167,7 +1203,7 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
if (flags & PCI_IRQ_MSIX) {
msix_vecs = __pci_enable_msix_range(dev, NULL, min_vecs,
- max_vecs, affd);
+ max_vecs, affd, flags);
if (msix_vecs > 0)
return msix_vecs;
}
diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
index bebbde4ebec0..8c94cd3fd1f2 100644
--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c
@@ -30,6 +30,10 @@ module_param(use_dma_mrpc, bool, 0644);
MODULE_PARM_DESC(use_dma_mrpc,
"Enable the use of the DMA MRPC feature");
+static int nirqs = 32;
+module_param(nirqs, int, 0644);
+MODULE_PARM_DESC(nirqs, "number of interrupts to allocate (more may be useful for NTB applications)");
+
static dev_t switchtec_devt;
static DEFINE_IDA(switchtec_minor_ida);
@@ -1263,8 +1267,12 @@ static int switchtec_init_isr(struct switchtec_dev *stdev)
int dma_mrpc_irq;
int rc;
- nvecs = pci_alloc_irq_vectors(stdev->pdev, 1, 4,
- PCI_IRQ_MSIX | PCI_IRQ_MSI);
+ if (nirqs < 4)
+ nirqs = 4;
+
+ nvecs = pci_alloc_irq_vectors(stdev->pdev, 1, nirqs,
+ PCI_IRQ_MSIX | PCI_IRQ_MSI |
+ PCI_IRQ_VIRTUAL);
if (nvecs < 0)
return nvecs;
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index e8fc28dba8df..96f0d34e9459 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -11,6 +11,7 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/kthread.h>
+#include <linux/bug.h>
#include "zfcp_ext.h"
#include "zfcp_reqlist.h"
@@ -217,6 +218,12 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(enum zfcp_erp_act_type need,
struct zfcp_erp_action *erp_action;
struct zfcp_scsi_dev *zfcp_sdev;
+ if (WARN_ON_ONCE(need != ZFCP_ERP_ACTION_REOPEN_LUN &&
+ need != ZFCP_ERP_ACTION_REOPEN_PORT &&
+ need != ZFCP_ERP_ACTION_REOPEN_PORT_FORCED &&
+ need != ZFCP_ERP_ACTION_REOPEN_ADAPTER))
+ return NULL;
+
switch (need) {
case ZFCP_ERP_ACTION_REOPEN_LUN:
zfcp_sdev = sdev_to_zfcp(sdev);
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index d94496ee6883..296bbc3c4606 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -11,6 +11,7 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/blktrace_api.h>
+#include <linux/types.h>
#include <linux/slab.h>
#include <scsi/fc/fc_els.h>
#include "zfcp_ext.h"
@@ -741,6 +742,7 @@ static struct zfcp_fsf_req *zfcp_fsf_req_create(struct zfcp_qdio *qdio,
static int zfcp_fsf_req_send(struct zfcp_fsf_req *req)
{
+ const bool is_srb = zfcp_fsf_req_is_status_read_buffer(req);
struct zfcp_adapter *adapter = req->adapter;
struct zfcp_qdio *qdio = adapter->qdio;
int req_id = req->req_id;
@@ -757,8 +759,20 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req)
return -EIO;
}
+ /*
+ * NOTE: DO NOT TOUCH ASYNC req PAST THIS POINT.
+ * ONLY TOUCH SYNC req AGAIN ON req->completion.
+ *
+ * The request might complete and be freed concurrently at any point
+ * now. This is not protected by the QDIO-lock (req_q_lock). So any
+ * uncontrolled access after this might result in an use-after-free bug.
+ * Only if the request doesn't have ZFCP_STATUS_FSFREQ_CLEANUP set, and
+ * when it is completed via req->completion, is it safe to use req
+ * again.
+ */
+
/* Don't increase for unsolicited status */
- if (!zfcp_fsf_req_is_status_read_buffer(req))
+ if (!is_srb)
adapter->fsf_req_seq_no++;
adapter->req_no++;
@@ -805,6 +819,7 @@ int zfcp_fsf_status_read(struct zfcp_qdio *qdio)
retval = zfcp_fsf_req_send(req);
if (retval)
goto failed_req_send;
+ /* NOTE: DO NOT TOUCH req PAST THIS POINT! */
goto out;
@@ -914,8 +929,10 @@ struct zfcp_fsf_req *zfcp_fsf_abort_fcp_cmnd(struct scsi_cmnd *scmnd)
req->qtcb->bottom.support.req_handle = (u64) old_req_id;
zfcp_fsf_start_timer(req, ZFCP_FSF_SCSI_ER_TIMEOUT);
- if (!zfcp_fsf_req_send(req))
+ if (!zfcp_fsf_req_send(req)) {
+ /* NOTE: DO NOT TOUCH req, UNTIL IT COMPLETES! */
goto out;
+ }
out_error_free:
zfcp_fsf_req_free(req);
@@ -1098,6 +1115,7 @@ int zfcp_fsf_send_ct(struct zfcp_fc_wka_port *wka_port,
ret = zfcp_fsf_req_send(req);
if (ret)
goto failed_send;
+ /* NOTE: DO NOT TOUCH req PAST THIS POINT! */
goto out;
@@ -1198,6 +1216,7 @@ int zfcp_fsf_send_els(struct zfcp_adapter *adapter, u32 d_id,
ret = zfcp_fsf_req_send(req);
if (ret)
goto failed_send;
+ /* NOTE: DO NOT TOUCH req PAST THIS POINT! */
goto out;
@@ -1243,6 +1262,7 @@ int zfcp_fsf_exchange_config_data(struct zfcp_erp_action *erp_action)
zfcp_fsf_req_free(req);
erp_action->fsf_req_id = 0;
}
+ /* NOTE: DO NOT TOUCH req PAST THIS POINT! */
out:
spin_unlock_irq(&qdio->req_q_lock);
return retval;
@@ -1279,8 +1299,10 @@ int zfcp_fsf_exchange_config_data_sync(struct zfcp_qdio *qdio,
zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT);
retval = zfcp_fsf_req_send(req);
spin_unlock_irq(&qdio->req_q_lock);
- if (!retval)
+ if (!retval) {
+ /* NOTE: ONLY TOUCH SYNC req AGAIN ON req->completion. */
wait_for_completion(&req->completion);
+ }
zfcp_fsf_req_free(req);
return retval;
@@ -1330,6 +1352,7 @@ int zfcp_fsf_exchange_port_data(struct zfcp_erp_action *erp_action)
zfcp_fsf_req_free(req);
erp_action->fsf_req_id = 0;
}
+ /* NOTE: DO NOT TOUCH req PAST THIS POINT! */
out:
spin_unlock_irq(&qdio->req_q_lock);
return retval;
@@ -1372,8 +1395,10 @@ int zfcp_fsf_exchange_port_data_sync(struct zfcp_qdio *qdio,
retval = zfcp_fsf_req_send(req);
spin_unlock_irq(&qdio->req_q_lock);
- if (!retval)
+ if (!retval) {
+ /* NOTE: ONLY TOUCH SYNC req AGAIN ON req->completion. */
wait_for_completion(&req->completion);
+ }
zfcp_fsf_req_free(req);
@@ -1493,6 +1518,7 @@ int zfcp_fsf_open_port(struct zfcp_erp_action *erp_action)
erp_action->fsf_req_id = 0;
put_device(&port->dev);
}
+ /* NOTE: DO NOT TOUCH req PAST THIS POINT! */
out:
spin_unlock_irq(&qdio->req_q_lock);
return retval;
@@ -1557,6 +1583,7 @@ int zfcp_fsf_close_port(struct zfcp_erp_action *erp_action)
zfcp_fsf_req_free(req);
erp_action->fsf_req_id = 0;
}
+ /* NOTE: DO NOT TOUCH req PAST THIS POINT! */
out:
spin_unlock_irq(&qdio->req_q_lock);
return retval;
@@ -1600,6 +1627,7 @@ int zfcp_fsf_open_wka_port(struct zfcp_fc_wka_port *wka_port)
{
struct zfcp_qdio *qdio = wka_port->adapter->qdio;
struct zfcp_fsf_req *req;
+ unsigned long req_id = 0;
int retval = -EIO;
spin_lock_irq(&qdio->req_q_lock);
@@ -1622,14 +1650,17 @@ int zfcp_fsf_open_wka_port(struct zfcp_fc_wka_port *wka_port)
hton24(req->qtcb->bottom.support.d_id, wka_port->d_id);
req->data = wka_port;
+ req_id = req->req_id;
+
zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT);
retval = zfcp_fsf_req_send(req);
if (retval)
zfcp_fsf_req_free(req);
+ /* NOTE: DO NOT TOUCH req PAST THIS POINT! */
out:
spin_unlock_irq(&qdio->req_q_lock);
if (!retval)
- zfcp_dbf_rec_run_wka("fsowp_1", wka_port, req->req_id);
+ zfcp_dbf_rec_run_wka("fsowp_1", wka_port, req_id);
return retval;
}
@@ -1655,6 +1686,7 @@ int zfcp_fsf_close_wka_port(struct zfcp_fc_wka_port *wka_port)
{
struct zfcp_qdio *qdio = wka_port->adapter->qdio;
struct zfcp_fsf_req *req;
+ unsigned long req_id = 0;
int retval = -EIO;
spin_lock_irq(&qdio->req_q_lock);
@@ -1677,14 +1709,17 @@ int zfcp_fsf_close_wka_port(struct zfcp_fc_wka_port *wka_port)
req->data = wka_port;
req->qtcb->header.port_handle = wka_port->handle;
+ req_id = req->req_id;
+
zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT);
retval = zfcp_fsf_req_send(req);
if (retval)
zfcp_fsf_req_free(req);
+ /* NOTE: DO NOT TOUCH req PAST THIS POINT! */
out:
spin_unlock_irq(&qdio->req_q_lock);
if (!retval)
- zfcp_dbf_rec_run_wka("fscwp_1", wka_port, req->req_id);
+ zfcp_dbf_rec_run_wka("fscwp_1", wka_port, req_id);
return retval;
}
@@ -1776,6 +1811,7 @@ int zfcp_fsf_close_physical_port(struct zfcp_erp_action *erp_action)
zfcp_fsf_req_free(req);
erp_action->fsf_req_id = 0;
}
+ /* NOTE: DO NOT TOUCH req PAST THIS POINT! */
out:
spin_unlock_irq(&qdio->req_q_lock);
return retval;
@@ -1899,6 +1935,7 @@ int zfcp_fsf_open_lun(struct zfcp_erp_action *erp_action)
zfcp_fsf_req_free(req);
erp_action->fsf_req_id = 0;
}
+ /* NOTE: DO NOT TOUCH req PAST THIS POINT! */
out:
spin_unlock_irq(&qdio->req_q_lock);
return retval;
@@ -1987,6 +2024,7 @@ int zfcp_fsf_close_lun(struct zfcp_erp_action *erp_action)
zfcp_fsf_req_free(req);
erp_action->fsf_req_id = 0;
}
+ /* NOTE: DO NOT TOUCH req PAST THIS POINT! */
out:
spin_unlock_irq(&qdio->req_q_lock);
return retval;
@@ -2299,6 +2337,7 @@ int zfcp_fsf_fcp_cmnd(struct scsi_cmnd *scsi_cmnd)
retval = zfcp_fsf_req_send(req);
if (unlikely(retval))
goto failed_scsi_cmnd;
+ /* NOTE: DO NOT TOUCH req PAST THIS POINT! */
goto out;
@@ -2373,8 +2412,10 @@ struct zfcp_fsf_req *zfcp_fsf_fcp_task_mgmt(struct scsi_device *sdev,
zfcp_fc_fcp_tm(fcp_cmnd, sdev, tm_flags);
zfcp_fsf_start_timer(req, ZFCP_FSF_SCSI_ER_TIMEOUT);
- if (!zfcp_fsf_req_send(req))
+ if (!zfcp_fsf_req_send(req)) {
+ /* NOTE: DO NOT TOUCH req, UNTIL IT COMPLETES! */
goto out;
+ }
zfcp_fsf_req_free(req);
req = NULL;
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index aeda53901064..c00e3dd57990 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -185,7 +185,7 @@ zalon7xx-objs := zalon.o ncr53c8xx.o
# Files generated that shall be removed upon make clean
clean-files := 53c700_d.h 53c700_u.h scsi_devinfo_tbl.c
-$(obj)/53c700.o $(MODVERDIR)/$(obj)/53c700.ver: $(obj)/53c700_d.h
+$(obj)/53c700.o: $(obj)/53c700_d.h
$(obj)/scsi_sysfs.o: $(obj)/scsi_devinfo_tbl.c
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index ff0d8c6a8d0c..55522b7162d3 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -462,6 +462,9 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
else
shost->dma_boundary = 0xffffffff;
+ if (sht->virt_boundary_mask)
+ shost->virt_boundary_mask = sht->virt_boundary_mask;
+
device_initialize(&shost->shost_gendev);
dev_set_name(&shost->shost_gendev, "host%d", shost->host_no);
shost->shost_gendev.bus = &scsi_bus_type;
diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
index 8e1053bdd843..52e866659853 100644
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -2591,7 +2591,7 @@ void fc_exch_recv(struct fc_lport *lport, struct fc_frame *fp)
/* lport lock ? */
if (!lport || lport->state == LPORT_ST_DISABLED) {
- FC_LPORT_DBG(lport, "Receiving frames for an lport that "
+ FC_LIBFC_DBG("Receiving frames for an lport that "
"has not been initialized correctly\n");
fc_frame_free(fp);
return;
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 4f339f939a51..bec83eb8ab87 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -414,7 +414,6 @@ static void sas_wait_eh(struct domain_device *dev)
goto retry;
}
}
-EXPORT_SYMBOL(sas_wait_eh);
static int sas_queue_reset(struct domain_device *dev, int reset_type,
u64 lun, int wait)
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h
index 2322ddb085c0..34070874616d 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.h
+++ b/drivers/scsi/lpfc/lpfc_debugfs.h
@@ -330,7 +330,7 @@ enum {
* This function dumps an entry indexed by @idx from a queue specified by the
* queue descriptor @q.
**/
-static inline void
+static void
lpfc_debug_dump_qe(struct lpfc_queue *q, uint32_t idx)
{
char line_buf[LPFC_LBUF_SZ];
diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h
index ca724fe91b8d..a14e8344822b 100644
--- a/drivers/scsi/megaraid/megaraid_sas.h
+++ b/drivers/scsi/megaraid/megaraid_sas.h
@@ -21,8 +21,8 @@
/*
* MegaRAID SAS Driver meta data
*/
-#define MEGASAS_VERSION "07.710.06.00-rc1"
-#define MEGASAS_RELDATE "June 18, 2019"
+#define MEGASAS_VERSION "07.710.50.00-rc1"
+#define MEGASAS_RELDATE "June 28, 2019"
/*
* Device IDs
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 80ab9700f1de..b2339d04a700 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -105,6 +105,10 @@ MODULE_PARM_DESC(perf_mode, "Performance mode (only for Aero adapters), options:
"default mode is 'balanced'"
);
+int event_log_level = MFI_EVT_CLASS_CRITICAL;
+module_param(event_log_level, int, 0644);
+MODULE_PARM_DESC(event_log_level, "Asynchronous event logging level- range is: -2(CLASS_DEBUG) to 4(CLASS_DEAD), Default: 2(CLASS_CRITICAL)");
+
MODULE_LICENSE("GPL");
MODULE_VERSION(MEGASAS_VERSION);
MODULE_AUTHOR("megaraidlinux.pdl@broadcom.com");
@@ -280,7 +284,7 @@ void megasas_set_dma_settings(struct megasas_instance *instance,
}
}
-void
+static void
megasas_issue_dcmd(struct megasas_instance *instance, struct megasas_cmd *cmd)
{
instance->instancet->fire_cmd(instance,
@@ -404,7 +408,13 @@ megasas_decode_evt(struct megasas_instance *instance)
union megasas_evt_class_locale class_locale;
class_locale.word = le32_to_cpu(evt_detail->cl.word);
- if (class_locale.members.class >= MFI_EVT_CLASS_CRITICAL)
+ if ((event_log_level < MFI_EVT_CLASS_DEBUG) ||
+ (event_log_level > MFI_EVT_CLASS_DEAD)) {
+ printk(KERN_WARNING "megaraid_sas: provided event log level is out of range, setting it to default 2(CLASS_CRITICAL), permissible range is: -2 to 4\n");
+ event_log_level = MFI_EVT_CLASS_CRITICAL;
+ }
+
+ if (class_locale.members.class >= event_log_level)
dev_info(&instance->pdev->dev, "%d (%s/0x%04x/%s) - %s\n",
le32_to_cpu(evt_detail->seq_num),
format_timestamp(le32_to_cpu(evt_detail->time_stamp)),
@@ -2237,7 +2247,7 @@ megasas_internal_reset_defer_cmds(struct megasas_instance *instance);
static void
process_fw_state_change_wq(struct work_struct *work);
-void megasas_do_ocr(struct megasas_instance *instance)
+static void megasas_do_ocr(struct megasas_instance *instance)
{
if ((instance->pdev->device == PCI_DEVICE_ID_LSI_SAS1064R) ||
(instance->pdev->device == PCI_DEVICE_ID_DELL_PERC5) ||
@@ -3303,7 +3313,7 @@ static DEVICE_ATTR_RO(fw_cmds_outstanding);
static DEVICE_ATTR_RO(dump_system_regs);
static DEVICE_ATTR_RO(raid_map_id);
-struct device_attribute *megaraid_host_attrs[] = {
+static struct device_attribute *megaraid_host_attrs[] = {
&dev_attr_fw_crash_buffer_size,
&dev_attr_fw_crash_buffer,
&dev_attr_fw_crash_state,
@@ -3334,6 +3344,7 @@ static struct scsi_host_template megasas_template = {
.shost_attrs = megaraid_host_attrs,
.bios_param = megasas_bios_param,
.change_queue_depth = scsi_change_queue_depth,
+ .max_segment_size = 0xffffffff,
.no_write_same = 1,
};
@@ -5933,7 +5944,8 @@ static int megasas_init_fw(struct megasas_instance *instance)
instance->is_rdpq = (scratch_pad_1 & MR_RDPQ_MODE_OFFSET) ?
1 : 0;
- if (!instance->msix_combined) {
+ if (instance->adapter_type >= INVADER_SERIES &&
+ !instance->msix_combined) {
instance->msix_load_balance = true;
instance->smp_affinity_enable = false;
}
@@ -6546,7 +6558,8 @@ megasas_get_target_prop(struct megasas_instance *instance,
int ret;
struct megasas_cmd *cmd;
struct megasas_dcmd_frame *dcmd;
- u16 targetId = (sdev->channel % 2) + sdev->id;
+ u16 targetId = ((sdev->channel % 2) * MEGASAS_MAX_DEV_PER_CHANNEL) +
+ sdev->id;
cmd = megasas_get_cmd(instance);
@@ -8748,6 +8761,12 @@ static int __init megasas_init(void)
goto err_pcidrv;
}
+ if ((event_log_level < MFI_EVT_CLASS_DEBUG) ||
+ (event_log_level > MFI_EVT_CLASS_DEAD)) {
+ printk(KERN_WARNING "megarid_sas: provided event log level is out of range, setting it to default 2(CLASS_CRITICAL), permissible range is: -2 to 4\n");
+ event_log_level = MFI_EVT_CLASS_CRITICAL;
+ }
+
rval = driver_create_file(&megasas_pci_driver.driver,
&driver_attr_version);
if (rval)
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 27c731a3fb49..717ba0845a2a 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -10238,6 +10238,7 @@ static struct scsi_host_template mpt3sas_driver_template = {
.this_id = -1,
.sg_tablesize = MPT3SAS_SG_DEPTH,
.max_sectors = 32767,
+ .max_segment_size = 0xffffffff,
.cmd_per_lun = 7,
.shost_attrs = mpt3sas_host_attrs,
.sdev_attrs = mpt3sas_dev_attrs,
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index dd38c356a1a4..9453705f643a 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -888,6 +888,8 @@ static void pm8001_dev_gone_notify(struct domain_device *dev)
spin_unlock_irqrestore(&pm8001_ha->lock, flags);
pm8001_exec_internal_task_abort(pm8001_ha, pm8001_dev ,
dev, 1, 0);
+ while (pm8001_dev->running_req)
+ msleep(20);
spin_lock_irqsave(&pm8001_ha->lock, flags);
}
PM8001_CHIP_DISP->dereg_dev_req(pm8001_ha, device_id);
@@ -1256,8 +1258,10 @@ int pm8001_abort_task(struct sas_task *task)
PM8001_MSG_DBG(pm8001_ha,
pm8001_printk("Waiting for Port reset\n"));
wait_for_completion(&completion_reset);
- if (phy->port_reset_status)
+ if (phy->port_reset_status) {
+ pm8001_dev_gone_notify(dev);
goto out;
+ }
/*
* 4. SATA Abort ALL
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c
index 1128d86d241a..73261902d75d 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.c
+++ b/drivers/scsi/pm8001/pm80xx_hwi.c
@@ -604,7 +604,7 @@ static void update_main_config_table(struct pm8001_hba_info *pm8001_ha)
pm8001_ha->main_cfg_tbl.pm80xx_tbl.port_recovery_timer &=
0x0000ffff;
pm8001_ha->main_cfg_tbl.pm80xx_tbl.port_recovery_timer |=
- 0x140000;
+ CHIP_8006_PORT_RECOVERY_TIMEOUT;
}
pm8001_mw32(address, MAIN_PORT_RECOVERY_TIMER,
pm8001_ha->main_cfg_tbl.pm80xx_tbl.port_recovery_timer);
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.h b/drivers/scsi/pm8001/pm80xx_hwi.h
index 84d7426441bf..dc9ab7689060 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.h
+++ b/drivers/scsi/pm8001/pm80xx_hwi.h
@@ -230,6 +230,8 @@
#define SAS_MAX_AIP 0x200000
#define IT_NEXUS_TIMEOUT 0x7D0
#define PORT_RECOVERY_TIMEOUT ((IT_NEXUS_TIMEOUT/100) + 30)
+/* Port recovery timeout, 10000 ms for PM8006 controller */
+#define CHIP_8006_PORT_RECOVERY_TIMEOUT 0x640000
#ifdef __LITTLE_ENDIAN_BITFIELD
struct sas_identify_frame_local {
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index a08ff3bd6310..df14597752ec 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -239,6 +239,8 @@ static struct {
{"LSI", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
{"ENGENIO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
{"LENOVO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
+ {"SanDisk", "Cruzer Blade", NULL, BLIST_TRY_VPD_PAGES |
+ BLIST_INQUIRY_36},
{"SMSC", "USB 2 HS-CF", NULL, BLIST_SPARSELUN | BLIST_INQUIRY_36},
{"SONY", "CD-ROM CDU-8001", NULL, BLIST_BORKEN},
{"SONY", "TSL", NULL, BLIST_FORCELUN}, /* DDS3 & DDS4 autoloaders */
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index e1da8c70a266..9381171c2fc0 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -84,11 +84,11 @@ int scsi_init_sense_cache(struct Scsi_Host *shost)
struct kmem_cache *cache;
int ret = 0;
+ mutex_lock(&scsi_sense_cache_mutex);
cache = scsi_select_sense_cache(shost->unchecked_isa_dma);
if (cache)
- return 0;
+ goto exit;
- mutex_lock(&scsi_sense_cache_mutex);
if (shost->unchecked_isa_dma) {
scsi_sense_isadma_cache =
kmem_cache_create("scsi_sense_cache(DMA)",
@@ -104,7 +104,7 @@ int scsi_init_sense_cache(struct Scsi_Host *shost)
if (!scsi_sense_cache)
ret = -ENOMEM;
}
-
+ exit:
mutex_unlock(&scsi_sense_cache_mutex);
return ret;
}
@@ -1452,7 +1452,7 @@ static void scsi_softirq_done(struct request *rq)
disposition = scsi_decide_disposition(cmd);
if (disposition != SUCCESS &&
time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
- sdev_printk(KERN_ERR, cmd->device,
+ scmd_printk(KERN_ERR, cmd,
"timing out command, waited %lus\n",
wait_for/HZ);
disposition = SUCCESS;
@@ -1784,6 +1784,8 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
blk_queue_max_integrity_segments(q, shost->sg_prot_tablesize);
}
+ shost->max_sectors = min_t(unsigned int, shost->max_sectors,
+ dma_max_mapping_size(dev) << SECTOR_SHIFT);
blk_queue_max_hw_sectors(q, shost->max_sectors);
if (shost->unchecked_isa_dma)
blk_queue_bounce_limit(q, BLK_BOUNCE_ISA);
@@ -1791,7 +1793,8 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
dma_set_seg_boundary(dev, shost->dma_boundary);
blk_queue_max_segment_size(q, shost->max_segment_size);
- dma_set_max_seg_size(dev, shost->max_segment_size);
+ blk_queue_virt_boundary(q, shost->virt_boundary_mask);
+ dma_set_max_seg_size(dev, queue_max_segment_size(q));
/*
* Set a reasonable default alignment: The larger of 32-byte (dword),
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index db16c19e05c4..5d6ff3931632 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -461,7 +461,7 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
{
struct gendisk *disk = sdkp->disk;
unsigned int nr_zones;
- u32 zone_blocks;
+ u32 zone_blocks = 0;
int ret;
if (!sd_is_zoned(sdkp))
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index c2b6a0ca6933..ed8b9ac805e6 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1423,9 +1423,6 @@ static int storvsc_device_configure(struct scsi_device *sdevice)
{
blk_queue_rq_timeout(sdevice->request_queue, (storvsc_timeout * HZ));
- /* Ensure there are no gaps in presented sgls */
- blk_queue_virt_boundary(sdevice->request_queue, PAGE_SIZE - 1);
-
sdevice->no_write_same = 1;
/*
@@ -1698,6 +1695,8 @@ static struct scsi_host_template scsi_driver = {
.this_id = -1,
/* Make sure we dont get a sg segment crosses a page boundary */
.dma_boundary = PAGE_SIZE-1,
+ /* Ensure there are no gaps in presented sgls */
+ .virt_boundary_mask = PAGE_SIZE-1,
.no_write_same = 1,
.track_queue_depth = 1,
.change_queue_depth = storvsc_change_queue_depth,
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 04d3686511c8..e274053109d0 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -4587,8 +4587,6 @@ static int ufshcd_slave_configure(struct scsi_device *sdev)
struct request_queue *q = sdev->request_queue;
blk_queue_update_dma_pad(q, PRDT_DATA_BYTE_COUNT_PAD - 1);
- blk_queue_max_segment_size(q, PRDT_DATA_BYTE_COUNT_MAX);
-
return 0;
}
@@ -7022,6 +7020,7 @@ static struct scsi_host_template ufshcd_driver_template = {
.sg_tablesize = SG_ALL,
.cmd_per_lun = UFSHCD_CMD_PER_LUN,
.can_queue = UFSHCD_CAN_QUEUE,
+ .max_segment_size = PRDT_DATA_BYTE_COUNT_MAX,
.max_host_blocked = 1,
.track_queue_depth = 1,
.sdev_groups = ufshcd_driver_groups,
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index aab29f48c62d..4ca0b8ff9a72 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1267,7 +1267,7 @@ __dentry_leases_walk(struct ceph_mds_client *mdsc,
if (!spin_trylock(&dentry->d_lock))
continue;
- if (dentry->d_lockref.count < 0) {
+ if (__lockref_is_dead(&dentry->d_lockref)) {
list_del_init(&di->lease_list);
goto next;
}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 270d3c58fb3b..3289b566463f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1104,6 +1104,10 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
goto out;
}
+ rc = -EOPNOTSUPP;
+ if (!target_tcon->ses->server->ops->copychunk_range)
+ goto out;
+
/*
* Note: cifs case is easier than btrfs since server responsible for
* checks for proper open modes and file type and if it wants
@@ -1115,11 +1119,12 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
/* should we flush first and last page first */
truncate_inode_pages(&target_inode->i_data, 0);
- if (target_tcon->ses->server->ops->copychunk_range)
+ rc = file_modified(dst_file);
+ if (!rc)
rc = target_tcon->ses->server->ops->copychunk_range(xid,
smb_file_src, smb_file_target, off, len, destoff);
- else
- rc = -EOPNOTSUPP;
+
+ file_accessed(src_file);
/* force revalidate of size and timestamps of target file now
* that target is updated on the server
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index aea005703785..4b21a90015a9 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -152,5 +152,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.20"
+#define CIFS_VERSION "2.21"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 1bffe029fb66..56ca4b8ccaba 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -2406,6 +2406,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
struct inode *inode = d_inode(direntry);
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifsInodeInfo *cifsInode = CIFS_I(inode);
+ struct cifsFileInfo *wfile;
+ struct cifs_tcon *tcon;
char *full_path = NULL;
int rc = -EACCES;
__u32 dosattr = 0;
@@ -2452,6 +2454,20 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
mapping_set_error(inode->i_mapping, rc);
rc = 0;
+ if (attrs->ia_valid & ATTR_MTIME) {
+ rc = cifs_get_writable_file(cifsInode, false, &wfile);
+ if (!rc) {
+ tcon = tlink_tcon(wfile->tlink);
+ rc = tcon->ses->server->ops->flush(xid, tcon, &wfile->fid);
+ cifsFileInfo_put(wfile);
+ if (rc)
+ return rc;
+ } else if (rc != -EBADF)
+ return rc;
+ else
+ rc = 0;
+ }
+
if (attrs->ia_valid & ATTR_SIZE) {
rc = cifs_set_file_size(inode, attrs, xid, full_path);
if (rc != 0)
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 54bffb2a1786..e6a1fc72018f 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -88,14 +88,20 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
}
if (buf) {
- /* open response does not have IndexNumber field - get it */
- rc = SMB2_get_srv_num(xid, oparms->tcon, fid->persistent_fid,
+ /* if open response does not have IndexNumber field - get it */
+ if (smb2_data->IndexNumber == 0) {
+ rc = SMB2_get_srv_num(xid, oparms->tcon,
+ fid->persistent_fid,
fid->volatile_fid,
&smb2_data->IndexNumber);
- if (rc) {
- /* let get_inode_info disable server inode numbers */
- smb2_data->IndexNumber = 0;
- rc = 0;
+ if (rc) {
+ /*
+ * let get_inode_info disable server inode
+ * numbers
+ */
+ smb2_data->IndexNumber = 0;
+ rc = 0;
+ }
}
move_smb2_info_to_cifs(buf, smb2_data);
}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 0cdc4e47ca87..a5bc1b671c12 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -694,8 +694,51 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
smb2_set_related(&rqst[1]);
+ /*
+ * We do not hold the lock for the open because in case
+ * SMB2_open needs to reconnect, it will end up calling
+ * cifs_mark_open_files_invalid() which takes the lock again
+ * thus causing a deadlock
+ */
+
+ mutex_unlock(&tcon->crfid.fid_mutex);
rc = compound_send_recv(xid, ses, flags, 2, rqst,
resp_buftype, rsp_iov);
+ mutex_lock(&tcon->crfid.fid_mutex);
+
+ /*
+ * Now we need to check again as the cached root might have
+ * been successfully re-opened from a concurrent process
+ */
+
+ if (tcon->crfid.is_valid) {
+ /* work was already done */
+
+ /* stash fids for close() later */
+ struct cifs_fid fid = {
+ .persistent_fid = pfid->persistent_fid,
+ .volatile_fid = pfid->volatile_fid,
+ };
+
+ /*
+ * caller expects this func to set pfid to a valid
+ * cached root, so we copy the existing one and get a
+ * reference.
+ */
+ memcpy(pfid, tcon->crfid.fid, sizeof(*pfid));
+ kref_get(&tcon->crfid.refcount);
+
+ mutex_unlock(&tcon->crfid.fid_mutex);
+
+ if (rc == 0) {
+ /* close extra handle outside of crit sec */
+ SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+ }
+ goto oshr_free;
+ }
+
+ /* Cached root is still invalid, continue normaly */
+
if (rc)
goto oshr_exit;
@@ -711,11 +754,12 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
tcon->crfid.is_valid = true;
kref_init(&tcon->crfid.refcount);
+ /* BB TBD check to see if oplock level check can be removed below */
if (o_rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE) {
kref_get(&tcon->crfid.refcount);
- oplock = smb2_parse_lease_state(server, o_rsp,
- &oparms.fid->epoch,
- oparms.fid->lease_key);
+ smb2_parse_contexts(server, o_rsp,
+ &oparms.fid->epoch,
+ oparms.fid->lease_key, &oplock, NULL);
} else
goto oshr_exit;
@@ -729,8 +773,9 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
(char *)&tcon->crfid.file_all_info))
tcon->crfid.file_all_info_is_valid = 1;
- oshr_exit:
+oshr_exit:
mutex_unlock(&tcon->crfid.fid_mutex);
+oshr_free:
SMB2_open_free(&rqst[0]);
SMB2_query_info_free(&rqst[1]);
free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index f58e4dc3987b..c8cd7b6cdda2 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1873,10 +1873,21 @@ create_reconnect_durable_buf(struct cifs_fid *fid)
return buf;
}
-__u8
-smb2_parse_lease_state(struct TCP_Server_Info *server,
+static void
+parse_query_id_ctxt(struct create_context *cc, struct smb2_file_all_info *buf)
+{
+ struct create_on_disk_id *pdisk_id = (struct create_on_disk_id *)cc;
+
+ cifs_dbg(FYI, "parse query id context 0x%llx 0x%llx\n",
+ pdisk_id->DiskFileId, pdisk_id->VolumeId);
+ buf->IndexNumber = pdisk_id->DiskFileId;
+}
+
+void
+smb2_parse_contexts(struct TCP_Server_Info *server,
struct smb2_create_rsp *rsp,
- unsigned int *epoch, char *lease_key)
+ unsigned int *epoch, char *lease_key, __u8 *oplock,
+ struct smb2_file_all_info *buf)
{
char *data_offset;
struct create_context *cc;
@@ -1884,15 +1895,24 @@ smb2_parse_lease_state(struct TCP_Server_Info *server,
unsigned int remaining;
char *name;
+ *oplock = 0;
data_offset = (char *)rsp + le32_to_cpu(rsp->CreateContextsOffset);
remaining = le32_to_cpu(rsp->CreateContextsLength);
cc = (struct create_context *)data_offset;
+
+ /* Initialize inode number to 0 in case no valid data in qfid context */
+ if (buf)
+ buf->IndexNumber = 0;
+
while (remaining >= sizeof(struct create_context)) {
name = le16_to_cpu(cc->NameOffset) + (char *)cc;
if (le16_to_cpu(cc->NameLength) == 4 &&
- strncmp(name, "RqLs", 4) == 0)
- return server->ops->parse_lease_buf(cc, epoch,
- lease_key);
+ strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4) == 0)
+ *oplock = server->ops->parse_lease_buf(cc, epoch,
+ lease_key);
+ else if (buf && (le16_to_cpu(cc->NameLength) == 4) &&
+ strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4) == 0)
+ parse_query_id_ctxt(cc, buf);
next = le32_to_cpu(cc->Next);
if (!next)
@@ -1901,7 +1921,10 @@ smb2_parse_lease_state(struct TCP_Server_Info *server,
cc = (struct create_context *)((char *)cc + next);
}
- return 0;
+ if (rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE)
+ *oplock = rsp->OplockLevel;
+
+ return;
}
static int
@@ -2588,12 +2611,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
buf->DeletePending = 0;
}
- if (rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE)
- *oplock = smb2_parse_lease_state(server, rsp,
- &oparms->fid->epoch,
- oparms->fid->lease_key);
- else
- *oplock = rsp->OplockLevel;
+
+ smb2_parse_contexts(server, rsp, &oparms->fid->epoch,
+ oparms->fid->lease_key, oplock, buf);
creat_exit:
SMB2_open_free(&rqst);
free_rsp_buf(resp_buftype, rsp);
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 7e2e782f8edd..747de9317659 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -818,7 +818,9 @@ struct durable_reconnect_context_v2 {
} __packed;
/* See MS-SMB2 2.2.14.2.9 */
-struct on_disk_id {
+struct create_on_disk_id {
+ struct create_context ccontext;
+ __u8 Name[8];
__le64 DiskFileId;
__le64 VolumeId;
__u32 Reserved[4];
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 52df125e9189..07ca72486cfa 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -228,9 +228,10 @@ extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *);
extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *,
enum securityEnum);
-extern __u8 smb2_parse_lease_state(struct TCP_Server_Info *server,
- struct smb2_create_rsp *rsp,
- unsigned int *epoch, char *lease_key);
+extern void smb2_parse_contexts(struct TCP_Server_Info *server,
+ struct smb2_create_rsp *rsp,
+ unsigned int *epoch, char *lease_key,
+ __u8 *oplock, struct smb2_file_all_info *buf);
extern int smb3_encryption_required(const struct cifs_tcon *tcon);
extern int smb2_validate_iov(unsigned int offset, unsigned int buffer_length,
struct kvec *iov, unsigned int min_buf_size);
diff --git a/fs/dcache.c b/fs/dcache.c
index f41121e5d1ec..e88cf0554e65 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -861,6 +861,32 @@ void dput(struct dentry *dentry)
}
EXPORT_SYMBOL(dput);
+static void __dput_to_list(struct dentry *dentry, struct list_head *list)
+__must_hold(&dentry->d_lock)
+{
+ if (dentry->d_flags & DCACHE_SHRINK_LIST) {
+ /* let the owner of the list it's on deal with it */
+ --dentry->d_lockref.count;
+ } else {
+ if (dentry->d_flags & DCACHE_LRU_LIST)
+ d_lru_del(dentry);
+ if (!--dentry->d_lockref.count)
+ d_shrink_add(dentry, list);
+ }
+}
+
+void dput_to_list(struct dentry *dentry, struct list_head *list)
+{
+ rcu_read_lock();
+ if (likely(fast_dput(dentry))) {
+ rcu_read_unlock();
+ return;
+ }
+ rcu_read_unlock();
+ if (!retain_dentry(dentry))
+ __dput_to_list(dentry, list);
+ spin_unlock(&dentry->d_lock);
+}
/* This must be called with d_lock held */
static inline void __dget_dlock(struct dentry *dentry)
@@ -1067,7 +1093,7 @@ out:
return false;
}
-static void shrink_dentry_list(struct list_head *list)
+void shrink_dentry_list(struct list_head *list)
{
while (!list_empty(list)) {
struct dentry *dentry, *parent;
@@ -1089,18 +1115,9 @@ static void shrink_dentry_list(struct list_head *list)
rcu_read_unlock();
d_shrink_del(dentry);
parent = dentry->d_parent;
+ if (parent != dentry)
+ __dput_to_list(parent, list);
__dentry_kill(dentry);
- if (parent == dentry)
- continue;
- /*
- * We need to prune ancestors too. This is necessary to prevent
- * quadratic behavior of shrink_dcache_parent(), but is also
- * expected to be beneficial in reducing dentry cache
- * fragmentation.
- */
- dentry = parent;
- while (dentry && !lockref_put_or_lock(&dentry->d_lockref))
- dentry = dentry_kill(dentry);
}
}
@@ -1445,8 +1462,11 @@ out:
struct select_data {
struct dentry *start;
+ union {
+ long found;
+ struct dentry *victim;
+ };
struct list_head dispose;
- int found;
};
static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
@@ -1478,6 +1498,37 @@ out:
return ret;
}
+static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry)
+{
+ struct select_data *data = _data;
+ enum d_walk_ret ret = D_WALK_CONTINUE;
+
+ if (data->start == dentry)
+ goto out;
+
+ if (dentry->d_flags & DCACHE_SHRINK_LIST) {
+ if (!dentry->d_lockref.count) {
+ rcu_read_lock();
+ data->victim = dentry;
+ return D_WALK_QUIT;
+ }
+ } else {
+ if (dentry->d_flags & DCACHE_LRU_LIST)
+ d_lru_del(dentry);
+ if (!dentry->d_lockref.count)
+ d_shrink_add(dentry, &data->dispose);
+ }
+ /*
+ * We can return to the caller if we have found some (this
+ * ensures forward progress). We'll be coming back to find
+ * the rest.
+ */
+ if (!list_empty(&data->dispose))
+ ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY;
+out:
+ return ret;
+}
+
/**
* shrink_dcache_parent - prune dcache
* @parent: parent of entries to prune
@@ -1487,12 +1538,9 @@ out:
void shrink_dcache_parent(struct dentry *parent)
{
for (;;) {
- struct select_data data;
+ struct select_data data = {.start = parent};
INIT_LIST_HEAD(&data.dispose);
- data.start = parent;
- data.found = 0;
-
d_walk(parent, &data, select_collect);
if (!list_empty(&data.dispose)) {
@@ -1503,6 +1551,24 @@ void shrink_dcache_parent(struct dentry *parent)
cond_resched();
if (!data.found)
break;
+ data.victim = NULL;
+ d_walk(parent, &data, select_collect2);
+ if (data.victim) {
+ struct dentry *parent;
+ spin_lock(&data.victim->d_lock);
+ if (!shrink_lock_dentry(data.victim)) {
+ spin_unlock(&data.victim->d_lock);
+ rcu_read_unlock();
+ } else {
+ rcu_read_unlock();
+ parent = data.victim->d_parent;
+ if (parent != data.victim)
+ __dput_to_list(parent, &data.dispose);
+ __dentry_kill(data.victim);
+ }
+ }
+ if (!list_empty(&data.dispose))
+ shrink_dentry_list(&data.dispose);
}
}
EXPORT_SYMBOL(shrink_dcache_parent);
diff --git a/fs/fs_pin.c b/fs/fs_pin.c
index a6497cf8ae53..47ef3c71ce90 100644
--- a/fs/fs_pin.c
+++ b/fs/fs_pin.c
@@ -19,20 +19,14 @@ void pin_remove(struct fs_pin *pin)
spin_unlock_irq(&pin->wait.lock);
}
-void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p)
+void pin_insert(struct fs_pin *pin, struct vfsmount *m)
{
spin_lock(&pin_lock);
- if (p)
- hlist_add_head(&pin->s_list, p);
+ hlist_add_head(&pin->s_list, &m->mnt_sb->s_pins);
hlist_add_head(&pin->m_list, &real_mount(m)->mnt_pins);
spin_unlock(&pin_lock);
}
-void pin_insert(struct fs_pin *pin, struct vfsmount *m)
-{
- pin_insert_group(pin, m, &m->mnt_sb->s_pins);
-}
-
void pin_kill(struct fs_pin *p)
{
wait_queue_entry_t wait;
diff --git a/fs/internal.h b/fs/internal.h
index ff5173212803..315fcd8d237c 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -157,6 +157,8 @@ extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc);
extern struct dentry *d_alloc_cursor(struct dentry *);
extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
extern char *simple_dname(struct dentry *, char *, int);
+extern void dput_to_list(struct dentry *, struct list_head *);
+extern void shrink_dentry_list(struct list_head *);
/*
* read_write.c
diff --git a/fs/mount.h b/fs/mount.h
index 6250de544760..711a4093e475 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -58,7 +58,10 @@ struct mount {
struct mount *mnt_master; /* slave is on master->mnt_slave_list */
struct mnt_namespace *mnt_ns; /* containing namespace */
struct mountpoint *mnt_mp; /* where is it mounted */
- struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */
+ union {
+ struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */
+ struct hlist_node mnt_umount;
+ };
struct list_head mnt_umounting; /* list entry for umount propagation */
#ifdef CONFIG_FSNOTIFY
struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
@@ -68,8 +71,7 @@ struct mount {
int mnt_group_id; /* peer group identifier */
int mnt_expiry_mark; /* true if marked for expiry */
struct hlist_head mnt_pins;
- struct fs_pin mnt_umount;
- struct dentry *mnt_ex_mountpoint;
+ struct hlist_head mnt_stuck_children;
} __randomize_layout;
#define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
diff --git a/fs/namespace.c b/fs/namespace.c
index f0d664adb9ba..6464ea4acba9 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -70,6 +70,8 @@ static struct hlist_head *mount_hashtable __read_mostly;
static struct hlist_head *mountpoint_hashtable __read_mostly;
static struct kmem_cache *mnt_cache __read_mostly;
static DECLARE_RWSEM(namespace_sem);
+static HLIST_HEAD(unmounted); /* protected by namespace_sem */
+static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
/* /sys/fs */
struct kobject *fs_kobj;
@@ -170,14 +172,6 @@ unsigned int mnt_get_count(struct mount *mnt)
#endif
}
-static void drop_mountpoint(struct fs_pin *p)
-{
- struct mount *m = container_of(p, struct mount, mnt_umount);
- dput(m->mnt_ex_mountpoint);
- pin_remove(p);
- mntput(&m->mnt);
-}
-
static struct mount *alloc_vfsmnt(const char *name)
{
struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -215,7 +209,7 @@ static struct mount *alloc_vfsmnt(const char *name)
INIT_LIST_HEAD(&mnt->mnt_slave);
INIT_HLIST_NODE(&mnt->mnt_mp_list);
INIT_LIST_HEAD(&mnt->mnt_umounting);
- init_fs_pin(&mnt->mnt_umount, drop_mountpoint);
+ INIT_HLIST_HEAD(&mnt->mnt_stuck_children);
}
return mnt;
@@ -740,7 +734,7 @@ mountpoint:
/* Add the new mountpoint to the hash table */
read_seqlock_excl(&mount_lock);
- new->m_dentry = dentry;
+ new->m_dentry = dget(dentry);
new->m_count = 1;
hlist_add_head(&new->m_hash, mp_hash(dentry));
INIT_HLIST_HEAD(&new->m_list);
@@ -753,7 +747,11 @@ done:
return mp;
}
-static void put_mountpoint(struct mountpoint *mp)
+/*
+ * vfsmount lock must be held. Additionally, the caller is responsible
+ * for serializing calls for given disposal list.
+ */
+static void __put_mountpoint(struct mountpoint *mp, struct list_head *list)
{
if (!--mp->m_count) {
struct dentry *dentry = mp->m_dentry;
@@ -761,11 +759,18 @@ static void put_mountpoint(struct mountpoint *mp)
spin_lock(&dentry->d_lock);
dentry->d_flags &= ~DCACHE_MOUNTED;
spin_unlock(&dentry->d_lock);
+ dput_to_list(dentry, list);
hlist_del(&mp->m_hash);
kfree(mp);
}
}
+/* called with namespace_lock and vfsmount lock */
+static void put_mountpoint(struct mountpoint *mp)
+{
+ __put_mountpoint(mp, &ex_mountpoints);
+}
+
static inline int check_mnt(struct mount *mnt)
{
return mnt->mnt_ns == current->nsproxy->mnt_ns;
@@ -796,25 +801,17 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
/*
* vfsmount lock must be held for write
*/
-static void unhash_mnt(struct mount *mnt)
+static struct mountpoint *unhash_mnt(struct mount *mnt)
{
+ struct mountpoint *mp;
mnt->mnt_parent = mnt;
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
list_del_init(&mnt->mnt_child);
hlist_del_init_rcu(&mnt->mnt_hash);
hlist_del_init(&mnt->mnt_mp_list);
- put_mountpoint(mnt->mnt_mp);
+ mp = mnt->mnt_mp;
mnt->mnt_mp = NULL;
-}
-
-/*
- * vfsmount lock must be held for write
- */
-static void detach_mnt(struct mount *mnt, struct path *old_path)
-{
- old_path->dentry = mnt->mnt_mountpoint;
- old_path->mnt = &mnt->mnt_parent->mnt;
- unhash_mnt(mnt);
+ return mp;
}
/*
@@ -822,9 +819,7 @@ static void detach_mnt(struct mount *mnt, struct path *old_path)
*/
static void umount_mnt(struct mount *mnt)
{
- /* old mountpoint will be dropped when we can do that */
- mnt->mnt_ex_mountpoint = mnt->mnt_mountpoint;
- unhash_mnt(mnt);
+ put_mountpoint(unhash_mnt(mnt));
}
/*
@@ -836,7 +831,7 @@ void mnt_set_mountpoint(struct mount *mnt,
{
mp->m_count++;
mnt_add_count(mnt, 1); /* essentially, that's mntget */
- child_mnt->mnt_mountpoint = dget(mp->m_dentry);
+ child_mnt->mnt_mountpoint = mp->m_dentry;
child_mnt->mnt_parent = mnt;
child_mnt->mnt_mp = mp;
hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
@@ -863,7 +858,6 @@ static void attach_mnt(struct mount *mnt,
void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
{
struct mountpoint *old_mp = mnt->mnt_mp;
- struct dentry *old_mountpoint = mnt->mnt_mountpoint;
struct mount *old_parent = mnt->mnt_parent;
list_del_init(&mnt->mnt_child);
@@ -873,22 +867,6 @@ void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct m
attach_mnt(mnt, parent, mp);
put_mountpoint(old_mp);
-
- /*
- * Safely avoid even the suggestion this code might sleep or
- * lock the mount hash by taking advantage of the knowledge that
- * mnt_change_mountpoint will not release the final reference
- * to a mountpoint.
- *
- * During mounting, the mount passed in as the parent mount will
- * continue to use the old mountpoint and during unmounting, the
- * old mountpoint will continue to exist until namespace_unlock,
- * which happens well after mnt_change_mountpoint.
- */
- spin_lock(&old_mountpoint->d_lock);
- old_mountpoint->d_lockref.count--;
- spin_unlock(&old_mountpoint->d_lock);
-
mnt_add_count(old_parent, -1);
}
@@ -1103,19 +1081,22 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
static void cleanup_mnt(struct mount *mnt)
{
+ struct hlist_node *p;
+ struct mount *m;
/*
- * This probably indicates that somebody messed
- * up a mnt_want/drop_write() pair. If this
- * happens, the filesystem was probably unable
- * to make r/w->r/o transitions.
- */
- /*
+ * The warning here probably indicates that somebody messed
+ * up a mnt_want/drop_write() pair. If this happens, the
+ * filesystem was probably unable to make r/w->r/o transitions.
* The locking used to deal with mnt_count decrement provides barriers,
* so mnt_get_writers() below is safe.
*/
WARN_ON(mnt_get_writers(mnt));
if (unlikely(mnt->mnt_pins.first))
mnt_pin_kill(mnt);
+ hlist_for_each_entry_safe(m, p, &mnt->mnt_stuck_children, mnt_umount) {
+ hlist_del(&m->mnt_umount);
+ mntput(&m->mnt);
+ }
fsnotify_vfsmount_delete(&mnt->mnt);
dput(mnt->mnt.mnt_root);
deactivate_super(mnt->mnt.mnt_sb);
@@ -1141,6 +1122,8 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
static void mntput_no_expire(struct mount *mnt)
{
+ LIST_HEAD(list);
+
rcu_read_lock();
if (likely(READ_ONCE(mnt->mnt_ns))) {
/*
@@ -1181,10 +1164,12 @@ static void mntput_no_expire(struct mount *mnt)
if (unlikely(!list_empty(&mnt->mnt_mounts))) {
struct mount *p, *tmp;
list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
- umount_mnt(p);
+ __put_mountpoint(unhash_mnt(p), &list);
+ hlist_add_head(&p->mnt_umount, &mnt->mnt_stuck_children);
}
}
unlock_mount_hash();
+ shrink_dentry_list(&list);
if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
struct task_struct *task = current;
@@ -1370,22 +1355,29 @@ int may_umount(struct vfsmount *mnt)
EXPORT_SYMBOL(may_umount);
-static HLIST_HEAD(unmounted); /* protected by namespace_sem */
-
static void namespace_unlock(void)
{
struct hlist_head head;
+ struct hlist_node *p;
+ struct mount *m;
+ LIST_HEAD(list);
hlist_move_list(&unmounted, &head);
+ list_splice_init(&ex_mountpoints, &list);
up_write(&namespace_sem);
+ shrink_dentry_list(&list);
+
if (likely(hlist_empty(&head)))
return;
synchronize_rcu_expedited();
- group_pin_kill(&head);
+ hlist_for_each_entry_safe(m, p, &head, mnt_umount) {
+ hlist_del(&m->mnt_umount);
+ mntput(&m->mnt);
+ }
}
static inline void namespace_lock(void)
@@ -1472,8 +1464,6 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
disconnect = disconnect_mount(p, how);
- pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt,
- disconnect ? &unmounted : NULL);
if (mnt_has_parent(p)) {
mnt_add_count(p->mnt_parent, -1);
if (!disconnect) {
@@ -1481,6 +1471,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
} else {
umount_mnt(p);
+ hlist_add_head(&p->mnt_umount, &unmounted);
}
}
change_mnt_propagation(p, MS_PRIVATE);
@@ -1626,15 +1617,15 @@ void __detach_mounts(struct dentry *dentry)
namespace_lock();
lock_mount_hash();
mp = lookup_mountpoint(dentry);
- if (IS_ERR_OR_NULL(mp))
+ if (!mp)
goto out_unlock;
event++;
while (!hlist_empty(&mp->m_list)) {
mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
- hlist_add_head(&mnt->mnt_umount.s_list, &unmounted);
umount_mnt(mnt);
+ hlist_add_head(&mnt->mnt_umount, &unmounted);
}
else umount_tree(mnt, UMOUNT_CONNECTED);
}
@@ -2046,7 +2037,7 @@ int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
static int attach_recursive_mnt(struct mount *source_mnt,
struct mount *dest_mnt,
struct mountpoint *dest_mp,
- struct path *parent_path)
+ bool moving)
{
struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
HLIST_HEAD(tree_list);
@@ -2064,7 +2055,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
return PTR_ERR(smp);
/* Is there space to add these mounts to the mount namespace? */
- if (!parent_path) {
+ if (!moving) {
err = count_mounts(ns, source_mnt);
if (err)
goto out;
@@ -2083,8 +2074,8 @@ static int attach_recursive_mnt(struct mount *source_mnt,
} else {
lock_mount_hash();
}
- if (parent_path) {
- detach_mnt(source_mnt, parent_path);
+ if (moving) {
+ unhash_mnt(source_mnt);
attach_mnt(source_mnt, dest_mnt, dest_mp);
touch_mnt_namespace(source_mnt->mnt_ns);
} else {
@@ -2182,7 +2173,7 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
d_is_dir(mnt->mnt.mnt_root))
return -ENOTDIR;
- return attach_recursive_mnt(mnt, p, mp, NULL);
+ return attach_recursive_mnt(mnt, p, mp, false);
}
/*
@@ -2575,11 +2566,11 @@ out:
static int do_move_mount(struct path *old_path, struct path *new_path)
{
- struct path parent_path = {.mnt = NULL, .dentry = NULL};
struct mnt_namespace *ns;
struct mount *p;
struct mount *old;
- struct mountpoint *mp;
+ struct mount *parent;
+ struct mountpoint *mp, *old_mp;
int err;
bool attached;
@@ -2589,7 +2580,9 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
old = real_mount(old_path->mnt);
p = real_mount(new_path->mnt);
+ parent = old->mnt_parent;
attached = mnt_has_parent(old);
+ old_mp = old->mnt_mp;
ns = old->mnt_ns;
err = -EINVAL;
@@ -2617,7 +2610,7 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
/*
* Don't move a mount residing in a shared parent.
*/
- if (attached && IS_MNT_SHARED(old->mnt_parent))
+ if (attached && IS_MNT_SHARED(parent))
goto out;
/*
* Don't move a mount tree containing unbindable mounts to a destination
@@ -2633,18 +2626,21 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
goto out;
err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp,
- attached ? &parent_path : NULL);
+ attached);
if (err)
goto out;
/* if the mount is moved, it should no longer be expire
* automatically */
list_del_init(&old->mnt_expire);
+ if (attached)
+ put_mountpoint(old_mp);
out:
unlock_mount(mp);
if (!err) {
- path_put(&parent_path);
- if (!attached)
+ if (attached)
+ mntput_no_expire(parent);
+ else
free_mnt_ns(ns);
}
return err;
@@ -3589,8 +3585,8 @@ EXPORT_SYMBOL(path_is_under);
SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
const char __user *, put_old)
{
- struct path new, old, parent_path, root_parent, root;
- struct mount *new_mnt, *root_mnt, *old_mnt;
+ struct path new, old, root;
+ struct mount *new_mnt, *root_mnt, *old_mnt, *root_parent, *ex_parent;
struct mountpoint *old_mp, *root_mp;
int error;
@@ -3619,9 +3615,11 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
new_mnt = real_mount(new.mnt);
root_mnt = real_mount(root.mnt);
old_mnt = real_mount(old.mnt);
+ ex_parent = new_mnt->mnt_parent;
+ root_parent = root_mnt->mnt_parent;
if (IS_MNT_SHARED(old_mnt) ||
- IS_MNT_SHARED(new_mnt->mnt_parent) ||
- IS_MNT_SHARED(root_mnt->mnt_parent))
+ IS_MNT_SHARED(ex_parent) ||
+ IS_MNT_SHARED(root_parent))
goto out4;
if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
goto out4;
@@ -3638,7 +3636,6 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
goto out4; /* not a mountpoint */
if (!mnt_has_parent(root_mnt))
goto out4; /* not attached */
- root_mp = root_mnt->mnt_mp;
if (new.mnt->mnt_root != new.dentry)
goto out4; /* not a mountpoint */
if (!mnt_has_parent(new_mnt))
@@ -3649,10 +3646,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
/* make certain new is below the root */
if (!is_path_reachable(new_mnt, new.dentry, &root))
goto out4;
- root_mp->m_count++; /* pin it so it won't go away */
lock_mount_hash();
- detach_mnt(new_mnt, &parent_path);
- detach_mnt(root_mnt, &root_parent);
+ umount_mnt(new_mnt);
+ root_mp = unhash_mnt(root_mnt); /* we'll need its mountpoint */
if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
new_mnt->mnt.mnt_flags |= MNT_LOCKED;
root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
@@ -3660,7 +3656,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
/* mount old root on put_old */
attach_mnt(root_mnt, old_mnt, old_mp);
/* mount new_root on / */
- attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
+ attach_mnt(new_mnt, root_parent, root_mp);
+ mnt_add_count(root_parent, -1);
touch_mnt_namespace(current->nsproxy->mnt_ns);
/* A moved mount should not expire automatically */
list_del_init(&new_mnt->mnt_expire);
@@ -3670,10 +3667,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
error = 0;
out4:
unlock_mount(old_mp);
- if (!error) {
- path_put(&root_parent);
- path_put(&parent_path);
- }
+ if (!error)
+ mntput_no_expire(ex_parent);
out3:
path_put(&root);
out2:
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3683d2b1cc8e..628631e2e34f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -457,10 +457,8 @@ int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
struct dentry *pd_dentry;
pd_dentry = dget_parent(dentry);
- if (pd_dentry != NULL) {
- nfs_zap_caches(d_inode(pd_dentry));
- dput(pd_dentry);
- }
+ nfs_zap_caches(d_inode(pd_dentry));
+ dput(pd_dentry);
}
nfs_free_fattr(res.fattr);
if (error < 0)
diff --git a/include/Kbuild b/include/Kbuild
index 7e9f1acb9dd5..c38f0d46b267 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -31,7 +31,7 @@ header-test- += acpi/platform/acintel.h
header-test- += acpi/platform/aclinux.h
header-test- += acpi/platform/aclinuxex.h
header-test- += acpi/processor.h
-header-test- += clocksource/hyperv_timer.h
+header-test-$(CONFIG_X86) += clocksource/hyperv_timer.h
header-test- += clocksource/timer-sp804.h
header-test- += crypto/cast_common.h
header-test- += crypto/internal/cryptouser.h
@@ -246,6 +246,7 @@ header-test- += linux/intel-pti.h
header-test- += linux/intel-svm.h
header-test- += linux/interconnect-provider.h
header-test- += linux/ioc3.h
+header-test-$(CONFIG_BLOCK) += linux/iomap.h
header-test- += linux/ipack.h
header-test- += linux/irq_cpustat.h
header-test- += linux/irq_poll.h
@@ -454,9 +455,6 @@ header-test- += linux/phy/omap_control_phy.h
header-test- += linux/phy/tegra/xusb.h
header-test- += linux/phy/ulpi_phy.h
header-test- += linux/phy_fixed.h
-header-test- += linux/pinctrl/pinconf-generic.h
-header-test- += linux/pinctrl/pinconf.h
-header-test- += linux/pinctrl/pinctrl.h
header-test- += linux/pipe_fs_i.h
header-test- += linux/pktcdvd.h
header-test- += linux/pl320-ipc.h
@@ -905,10 +903,11 @@ header-test- += net/netfilter/nf_nat_redirect.h
header-test- += net/netfilter/nf_queue.h
header-test- += net/netfilter/nf_reject.h
header-test- += net/netfilter/nf_synproxy.h
-header-test- += net/netfilter/nf_tables.h
-header-test- += net/netfilter/nf_tables_core.h
-header-test- += net/netfilter/nf_tables_ipv4.h
+header-test-$(CONFIG_NF_TABLES) += net/netfilter/nf_tables.h
+header-test-$(CONFIG_NF_TABLES) += net/netfilter/nf_tables_core.h
+header-test-$(CONFIG_NF_TABLES) += net/netfilter/nf_tables_ipv4.h
header-test- += net/netfilter/nf_tables_ipv6.h
+header-test-$(CONFIG_NF_TABLES) += net/netfilter/nf_tables_offload.h
header-test- += net/netfilter/nft_fib.h
header-test- += net/netfilter/nft_meta.h
header-test- += net/netfilter/nft_reject.h
@@ -949,7 +948,6 @@ header-test- += pcmcia/ds.h
header-test- += rdma/ib.h
header-test- += rdma/iw_portmap.h
header-test- += rdma/opa_port_info.h
-header-test- += rdma/rdma_counter.h
header-test- += rdma/rdmavt_cq.h
header-test- += rdma/restrack.h
header-test- += rdma/signature.h
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index e8579412ad21..d7ee4c6bad48 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -170,3 +170,5 @@
#else
#define __diag_GCC_8(s)
#endif
+
+#define __no_fgcse __attribute__((optimize("-fno-gcse")))
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 8aaf7cd026b0..f0fd5636fddb 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -116,9 +116,14 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
".pushsection .discard.unreachable\n\t" \
".long 999b - .\n\t" \
".popsection\n\t"
+
+/* Annotate a C jump table to allow objtool to follow the code flow */
+#define __annotate_jump_table __section(".rodata..c_jump_table")
+
#else
#define annotate_reachable()
#define annotate_unreachable()
+#define __annotate_jump_table
#endif
#ifndef ASM_UNREACHABLE
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 095d55c3834d..599c27b56c29 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -189,6 +189,10 @@ struct ftrace_likely_data {
#define asm_volatile_goto(x...) asm goto(x)
#endif
+#ifndef __no_fgcse
+# define __no_fgcse
+#endif
+
/* Are two types/vars the same type (ignoring qualifiers)? */
#define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index b7338702592a..adf993a3bd58 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -32,6 +32,15 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
}
#endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */
+#ifdef CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED
+bool force_dma_unencrypted(struct device *dev);
+#else
+static inline bool force_dma_unencrypted(struct device *dev)
+{
+ return false;
+}
+#endif /* CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED */
+
/*
* If memory encryption is supported, phys_to_dma will set the memory encryption
* bit in the DMA address, and dma_to_phys will clear it. The raw __phys_to_dma
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 8d13e28a8e07..e11b115dd0e4 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -679,6 +679,20 @@ static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask)
return dma_set_mask_and_coherent(dev, mask);
}
+/**
+ * dma_addressing_limited - return if the device is addressing limited
+ * @dev: device to check
+ *
+ * Return %true if the devices DMA mask is too small to address all memory in
+ * the system, else %false. Lack of addressing bits is the prime reason for
+ * bounce buffering, but might not be the only one.
+ */
+static inline bool dma_addressing_limited(struct device *dev)
+{
+ return min_not_zero(*dev->dma_mask, dev->bus_dma_mask) <
+ dma_get_required_mask(dev);
+}
+
#ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct iommu_ops *iommu, bool coherent);
diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h
index 7cab74d66f85..bdd09fd2520c 100644
--- a/include/linux/fs_pin.h
+++ b/include/linux/fs_pin.h
@@ -20,6 +20,5 @@ static inline void init_fs_pin(struct fs_pin *p, void (*kill)(struct fs_pin *))
}
void pin_remove(struct fs_pin *);
-void pin_insert_group(struct fs_pin *, struct vfsmount *, struct hlist_head *);
void pin_insert(struct fs_pin *, struct vfsmount *);
void pin_kill(struct fs_pin *);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c5da875f19e3..5c5b5867024c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -318,6 +318,7 @@ struct kvm_vcpu {
} spin_loop;
#endif
bool preempted;
+ bool ready;
struct kvm_vcpu_arch arch;
struct dentry *debugfs_dentry;
};
diff --git a/include/linux/msi.h b/include/linux/msi.h
index d48e919d55ae..8ad679e9d9c0 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -64,6 +64,10 @@ struct ti_sci_inta_msi_desc {
* @msg: The last set MSI message cached for reuse
* @affinity: Optional pointer to a cpu affinity mask for this descriptor
*
+ * @write_msi_msg: Callback that may be called when the MSI message
+ * address or data changes
+ * @write_msi_msg_data: Data parameter for the callback.
+ *
* @masked: [PCI MSI/X] Mask bits
* @is_msix: [PCI MSI/X] True if MSI-X
* @multiple: [PCI MSI/X] log2 num of messages allocated
@@ -90,6 +94,9 @@ struct msi_desc {
const void *iommu_cookie;
#endif
+ void (*write_msi_msg)(struct msi_desc *entry, void *data);
+ void *write_msi_msg_data;
+
union {
/* PCI MSI/X specific data */
struct {
@@ -100,6 +107,7 @@ struct msi_desc {
u8 multi_cap : 3;
u8 maskbit : 1;
u8 is_64 : 1;
+ u8 is_virtual : 1;
u16 entry_nr;
unsigned default_irq;
} msi_attrib;
diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index 56a92e3ae3ae..8c13538aeffe 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -58,9 +58,11 @@
#include <linux/completion.h>
#include <linux/device.h>
+#include <linux/interrupt.h>
struct ntb_client;
struct ntb_dev;
+struct ntb_msi;
struct pci_dev;
/**
@@ -205,7 +207,7 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
}
/**
- * struct ntb_ctx_ops - ntb device operations
+ * struct ntb_dev_ops - ntb device operations
* @port_number: See ntb_port_number().
* @peer_port_count: See ntb_peer_port_count().
* @peer_port_number: See ntb_peer_port_number().
@@ -404,7 +406,7 @@ struct ntb_client {
#define drv_ntb_client(__drv) container_of((__drv), struct ntb_client, drv)
/**
- * struct ntb_device - ntb device
+ * struct ntb_dev - ntb device
* @dev: Linux device object.
* @pdev: PCI device entry of the ntb.
* @topo: Detected topology of the ntb.
@@ -426,6 +428,10 @@ struct ntb_dev {
spinlock_t ctx_lock;
/* block unregister until device is fully released */
struct completion released;
+
+#ifdef CONFIG_NTB_MSI
+ struct ntb_msi *msi;
+#endif
};
#define dev_ntb(__dev) container_of((__dev), struct ntb_dev, dev)
@@ -616,7 +622,6 @@ static inline int ntb_port_number(struct ntb_dev *ntb)
return ntb->ops->port_number(ntb);
}
-
/**
* ntb_peer_port_count() - get the number of peer device ports
* @ntb: NTB device context.
@@ -654,6 +659,58 @@ static inline int ntb_peer_port_number(struct ntb_dev *ntb, int pidx)
}
/**
+ * ntb_logical_port_number() - get the logical port number of the local port
+ * @ntb: NTB device context.
+ *
+ * The Logical Port Number is defined to be a unique number for each
+ * port starting from zero through to the number of ports minus one.
+ * This is in contrast to the Port Number where each port can be assigned
+ * any unique physical number by the hardware.
+ *
+ * The logical port number is useful for calculating the resource indexes
+ * used by peers.
+ *
+ * Return: the logical port number or negative value indicating an error
+ */
+static inline int ntb_logical_port_number(struct ntb_dev *ntb)
+{
+ int lport = ntb_port_number(ntb);
+ int pidx;
+
+ if (lport < 0)
+ return lport;
+
+ for (pidx = 0; pidx < ntb_peer_port_count(ntb); pidx++)
+ if (lport <= ntb_peer_port_number(ntb, pidx))
+ return pidx;
+
+ return pidx;
+}
+
+/**
+ * ntb_peer_logical_port_number() - get the logical peer port by given index
+ * @ntb: NTB device context.
+ * @pidx: Peer port index.
+ *
+ * The Logical Port Number is defined to be a unique number for each
+ * port starting from zero through to the number of ports minus one.
+ * This is in contrast to the Port Number where each port can be assigned
+ * any unique physical number by the hardware.
+ *
+ * The logical port number is useful for calculating the resource indexes
+ * used by peers.
+ *
+ * Return: the peer's logical port number or negative value indicating an error
+ */
+static inline int ntb_peer_logical_port_number(struct ntb_dev *ntb, int pidx)
+{
+ if (ntb_peer_port_number(ntb, pidx) < ntb_port_number(ntb))
+ return pidx;
+ else
+ return pidx + 1;
+}
+
+/**
* ntb_peer_port_idx() - get the peer device port index by given port number
* @ntb: NTB device context.
* @port: Peer port number.
@@ -1506,4 +1563,141 @@ static inline int ntb_peer_msg_write(struct ntb_dev *ntb, int pidx, int midx,
return ntb->ops->peer_msg_write(ntb, pidx, midx, msg);
}
+/**
+ * ntb_peer_resource_idx() - get a resource index for a given peer idx
+ * @ntb: NTB device context.
+ * @pidx: Peer port index.
+ *
+ * When constructing a graph of peers, each remote peer must use a different
+ * resource index (mw, doorbell, etc) to communicate with each other
+ * peer.
+ *
+ * In a two peer system, this function should always return 0 such that
+ * resource 0 points to the remote peer on both ports.
+ *
+ * In a 5 peer system, this function will return the following matrix
+ *
+ * pidx \ port 0 1 2 3 4
+ * 0 0 0 1 2 3
+ * 1 0 1 1 2 3
+ * 2 0 1 2 2 3
+ * 3 0 1 2 3 3
+ *
+ * For example, if this function is used to program peer's memory
+ * windows, port 0 will program MW 0 on all it's peers to point to itself.
+ * port 1 will program MW 0 in port 0 to point to itself and MW 1 on all
+ * other ports. etc.
+ *
+ * For the legacy two host case, ntb_port_number() and ntb_peer_port_number()
+ * both return zero and therefore this function will always return zero.
+ * So MW 0 on each host would be programmed to point to the other host.
+ *
+ * Return: the resource index to use for that peer.
+ */
+static inline int ntb_peer_resource_idx(struct ntb_dev *ntb, int pidx)
+{
+ int local_port, peer_port;
+
+ if (pidx >= ntb_peer_port_count(ntb))
+ return -EINVAL;
+
+ local_port = ntb_logical_port_number(ntb);
+ peer_port = ntb_peer_logical_port_number(ntb, pidx);
+
+ if (peer_port < local_port)
+ return local_port - 1;
+ else
+ return local_port;
+}
+
+/**
+ * ntb_peer_highest_mw_idx() - get a memory window index for a given peer idx
+ * using the highest index memory windows first
+ *
+ * @ntb: NTB device context.
+ * @pidx: Peer port index.
+ *
+ * Like ntb_peer_resource_idx(), except it returns indexes starting with
+ * last memory window index.
+ *
+ * Return: the resource index to use for that peer.
+ */
+static inline int ntb_peer_highest_mw_idx(struct ntb_dev *ntb, int pidx)
+{
+ int ret;
+
+ ret = ntb_peer_resource_idx(ntb, pidx);
+ if (ret < 0)
+ return ret;
+
+ return ntb_mw_count(ntb, pidx) - ret - 1;
+}
+
+struct ntb_msi_desc {
+ u32 addr_offset;
+ u32 data;
+};
+
+#ifdef CONFIG_NTB_MSI
+
+int ntb_msi_init(struct ntb_dev *ntb, void (*desc_changed)(void *ctx));
+int ntb_msi_setup_mws(struct ntb_dev *ntb);
+void ntb_msi_clear_mws(struct ntb_dev *ntb);
+int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler,
+ irq_handler_t thread_fn,
+ const char *name, void *dev_id,
+ struct ntb_msi_desc *msi_desc);
+void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, void *dev_id);
+int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer,
+ struct ntb_msi_desc *desc);
+int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer,
+ struct ntb_msi_desc *desc,
+ phys_addr_t *msi_addr);
+
+#else /* not CONFIG_NTB_MSI */
+
+static inline int ntb_msi_init(struct ntb_dev *ntb,
+ void (*desc_changed)(void *ctx))
+{
+ return -EOPNOTSUPP;
+}
+static inline int ntb_msi_setup_mws(struct ntb_dev *ntb)
+{
+ return -EOPNOTSUPP;
+}
+static inline void ntb_msi_clear_mws(struct ntb_dev *ntb) {}
+static inline int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb,
+ irq_handler_t handler,
+ irq_handler_t thread_fn,
+ const char *name, void *dev_id,
+ struct ntb_msi_desc *msi_desc)
+{
+ return -EOPNOTSUPP;
+}
+static inline void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq,
+ void *dev_id) {}
+static inline int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer,
+ struct ntb_msi_desc *desc)
+{
+ return -EOPNOTSUPP;
+}
+static inline int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer,
+ struct ntb_msi_desc *desc,
+ phys_addr_t *msi_addr)
+{
+ return -EOPNOTSUPP;
+
+}
+
+#endif /* CONFIG_NTB_MSI */
+
+static inline int ntbm_msi_request_irq(struct ntb_dev *ntb,
+ irq_handler_t handler,
+ const char *name, void *dev_id,
+ struct ntb_msi_desc *msi_desc)
+{
+ return ntbm_msi_request_threaded_irq(ntb, handler, NULL, name,
+ dev_id, msi_desc);
+}
+
#endif
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 2972793e3028..9e700d9f9f28 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1412,6 +1412,15 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode,
#define PCI_IRQ_MSI (1 << 1) /* Allow MSI interrupts */
#define PCI_IRQ_MSIX (1 << 2) /* Allow MSI-X interrupts */
#define PCI_IRQ_AFFINITY (1 << 3) /* Auto-assign affinity */
+
+/*
+ * Virtual interrupts allow for more interrupts to be allocated
+ * than the device has interrupts for. These are not programmed
+ * into the device's MSI-X table and must be handled by some
+ * other driver means.
+ */
+#define PCI_IRQ_VIRTUAL (1 << 4)
+
#define PCI_IRQ_ALL_TYPES \
(PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX)
diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
index b0fb1446fe04..6c8512d3be88 100644
--- a/include/linux/sched/isolation.h
+++ b/include/linux/sched/isolation.h
@@ -19,6 +19,7 @@ enum hk_flags {
DECLARE_STATIC_KEY_FALSE(housekeeping_overridden);
extern int housekeeping_any_cpu(enum hk_flags flags);
extern const struct cpumask *housekeeping_cpumask(enum hk_flags flags);
+extern bool housekeeping_enabled(enum hk_flags flags);
extern void housekeeping_affine(struct task_struct *t, enum hk_flags flags);
extern bool housekeeping_test_cpu(int cpu, enum hk_flags flags);
extern void __init housekeeping_init(void);
@@ -35,6 +36,11 @@ static inline const struct cpumask *housekeeping_cpumask(enum hk_flags flags)
return cpu_possible_mask;
}
+static inline bool housekeeping_enabled(enum hk_flags flags)
+{
+ return false;
+}
+
static inline void housekeeping_affine(struct task_struct *t,
enum hk_flags flags) { }
static inline void housekeeping_init(void) { }
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index a5fcdad4a03e..cc139dbd71e5 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -369,6 +369,8 @@ struct scsi_host_template {
*/
unsigned long dma_boundary;
+ unsigned long virt_boundary_mask;
+
/*
* This specifies "machine infinity" for host templates which don't
* limit the transfer size. Note this limit represents an absolute
@@ -587,6 +589,7 @@ struct Scsi_Host {
unsigned int max_sectors;
unsigned int max_segment_size;
unsigned long dma_boundary;
+ unsigned long virt_boundary_mask;
/*
* In scsi-mq mode, the number of hardware queues supported by the LLD.
*
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index dc0b682ec2d9..fc020c09b7e8 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -35,10 +35,10 @@ config PREEMPT_VOLUNTARY
Select this if you are building a kernel for a desktop system.
-config PREEMPT
+config PREEMPT_LL
bool "Preemptible Kernel (Low-Latency Desktop)"
depends on !ARCH_NO_PREEMPT
- select PREEMPT_COUNT
+ select PREEMPT
select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
help
This option reduces the latency of the kernel by making
@@ -55,7 +55,28 @@ config PREEMPT
embedded system with latency requirements in the milliseconds
range.
+config PREEMPT_RT
+ bool "Fully Preemptible Kernel (Real-Time)"
+ depends on EXPERT && ARCH_SUPPORTS_RT
+ select PREEMPT
+ help
+ This option turns the kernel into a real-time kernel by replacing
+ various locking primitives (spinlocks, rwlocks, etc.) with
+ preemptible priority-inheritance aware variants, enforcing
+ interrupt threading and introducing mechanisms to break up long
+ non-preemptible sections. This makes the kernel, except for very
+ low level and critical code pathes (entry code, scheduler, low
+ level interrupt handling) fully preemptible and brings most
+ execution contexts under scheduler control.
+
+ Select this if you are building a kernel for systems which
+ require real-time guarantees.
+
endchoice
config PREEMPT_COUNT
bool
+
+config PREEMPT
+ bool
+ select PREEMPT_COUNT
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 16079550db6d..8191a7db2777 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1295,11 +1295,11 @@ bool bpf_opcode_in_insntable(u8 code)
*
* Decode and execute eBPF instructions.
*/
-static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
+static u64 __no_fgcse ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
{
#define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y
#define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
- static const void *jumptable[256] = {
+ static const void * const jumptable[256] __annotate_jump_table = {
[0 ... 255] = &&default_label,
/* Now overwrite non-defaults ... */
BPF_INSN_MAP(BPF_INSN_2_LBL, BPF_INSN_3_LBL),
@@ -1558,7 +1558,6 @@ out:
BUG_ON(1);
return 0;
}
-STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */
#define PROG_NAME(stack_size) __bpf_prog_run##stack_size
#define DEFINE_BPF_PROG_RUN(stack_size) \
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 70f8f8d9200e..9decbba255fc 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -48,6 +48,9 @@ config ARCH_HAS_DMA_COHERENT_TO_PFN
config ARCH_HAS_DMA_MMAP_PGPROT
bool
+config ARCH_HAS_FORCE_DMA_UNENCRYPTED
+ bool
+
config DMA_NONCOHERENT_CACHE_SYNC
bool
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index b90e1aede743..59bdceea3737 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -23,14 +23,6 @@
#define ARCH_ZONE_DMA_BITS 24
#endif
-/*
- * For AMD SEV all DMA must be to unencrypted addresses.
- */
-static inline bool force_dma_unencrypted(void)
-{
- return sev_active();
-}
-
static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size)
{
if (!dev->dma_mask) {
@@ -46,7 +38,7 @@ static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size)
static inline dma_addr_t phys_to_dma_direct(struct device *dev,
phys_addr_t phys)
{
- if (force_dma_unencrypted())
+ if (force_dma_unencrypted(dev))
return __phys_to_dma(dev, phys);
return phys_to_dma(dev, phys);
}
@@ -67,7 +59,7 @@ static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
if (dev->bus_dma_mask && dev->bus_dma_mask < dma_mask)
dma_mask = dev->bus_dma_mask;
- if (force_dma_unencrypted())
+ if (force_dma_unencrypted(dev))
*phys_mask = __dma_to_phys(dev, dma_mask);
else
*phys_mask = dma_to_phys(dev, dma_mask);
@@ -159,7 +151,7 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
}
ret = page_address(page);
- if (force_dma_unencrypted()) {
+ if (force_dma_unencrypted(dev)) {
set_memory_decrypted((unsigned long)ret, 1 << get_order(size));
*dma_handle = __phys_to_dma(dev, page_to_phys(page));
} else {
@@ -192,7 +184,7 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
return;
}
- if (force_dma_unencrypted())
+ if (force_dma_unencrypted(dev))
set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) &&
@@ -242,12 +234,14 @@ void dma_direct_sync_sg_for_device(struct device *dev,
int i;
for_each_sg(sgl, sg, nents, i) {
- if (unlikely(is_swiotlb_buffer(sg_phys(sg))))
- swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length,
+ phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
+
+ if (unlikely(is_swiotlb_buffer(paddr)))
+ swiotlb_tbl_sync_single(dev, paddr, sg->length,
dir, SYNC_FOR_DEVICE);
if (!dev_is_dma_coherent(dev))
- arch_sync_dma_for_device(dev, sg_phys(sg), sg->length,
+ arch_sync_dma_for_device(dev, paddr, sg->length,
dir);
}
}
@@ -279,11 +273,13 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
int i;
for_each_sg(sgl, sg, nents, i) {
+ phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
+
if (!dev_is_dma_coherent(dev))
- arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
-
- if (unlikely(is_swiotlb_buffer(sg_phys(sg))))
- swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length, dir,
+ arch_sync_dma_for_cpu(dev, paddr, sg->length, dir);
+
+ if (unlikely(is_swiotlb_buffer(paddr)))
+ swiotlb_tbl_sync_single(dev, paddr, sg->length, dir,
SYNC_FOR_CPU);
}
@@ -407,11 +403,9 @@ int dma_direct_supported(struct device *dev, u64 mask)
size_t dma_direct_max_mapping_size(struct device *dev)
{
- size_t size = SIZE_MAX;
-
/* If SWIOTLB is active, use its maximum mapping size */
- if (is_swiotlb_active())
- size = swiotlb_max_mapping_size(dev);
-
- return size;
+ if (is_swiotlb_active() &&
+ (dma_addressing_limited(dev) || swiotlb_force == SWIOTLB_FORCE))
+ return swiotlb_max_mapping_size(dev);
+ return SIZE_MAX;
}
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 123ea07a3f3b..ccb28085b114 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -14,6 +14,12 @@ EXPORT_SYMBOL_GPL(housekeeping_overridden);
static cpumask_var_t housekeeping_mask;
static unsigned int housekeeping_flags;
+bool housekeeping_enabled(enum hk_flags flags)
+{
+ return !!(housekeeping_flags & flags);
+}
+EXPORT_SYMBOL_GPL(housekeeping_enabled);
+
int housekeeping_any_cpu(enum hk_flags flags)
{
if (static_branch_unlikely(&housekeeping_overridden))
diff --git a/kernel/smp.c b/kernel/smp.c
index 616d4d114847..7dbcb402c2fc 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -291,6 +291,14 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
&& !oops_in_progress);
+ /*
+ * When @wait we can deadlock when we interrupt between llist_add() and
+ * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
+ * csd_lock() on because the interrupt context uses the same csd
+ * storage.
+ */
+ WARN_ON_ONCE(!in_task());
+
csd = &csd_stack;
if (!wait) {
csd = this_cpu_ptr(&csd_data);
@@ -416,6 +424,14 @@ void smp_call_function_many(const struct cpumask *mask,
WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
&& !oops_in_progress && !early_boot_irqs_disabled);
+ /*
+ * When @wait we can deadlock when we interrupt between llist_add() and
+ * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
+ * csd_lock() on because the interrupt context uses the same csd
+ * storage.
+ */
+ WARN_ON_ONCE(!in_task());
+
/* Try to fastpath. So, what's a CPU they want? Ignoring this one. */
cpu = cpumask_first_and(mask, cpu_online_mask);
if (cpu == this_cpu)
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index e6a02b274b73..f5440abb7532 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -226,12 +226,17 @@ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size)
.store = store,
.size = size,
};
+ mm_segment_t fs;
/* Trace user stack if not a kernel thread */
if (current->flags & PF_KTHREAD)
return 0;
+ fs = get_fs();
+ set_fs(USER_DS);
arch_stack_walk_user(consume_entry, &c, task_pt_regs(current));
+ set_fs(fs);
+
return c.len;
}
#endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index bc6673ab3a08..5960e2980a8a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -353,23 +353,13 @@ config DEBUG_SECTION_MISMATCH
which results in the code/data being placed in specific sections.
The section mismatch analysis is always performed after a full
kernel build, and enabling this option causes the following
- additional steps to occur:
+ additional step to occur:
- Add the option -fno-inline-functions-called-once to gcc commands.
When inlining a function annotated with __init in a non-init
function, we would lose the section information and thus
the analysis would not catch the illegal reference.
This option tells gcc to inline less (but it does result in
a larger kernel).
- - Run the section mismatch analysis for each module/built-in.a file.
- When we run the section mismatch analysis on vmlinux.o, we
- lose valuable information about where the mismatch was
- introduced.
- Running the analysis for each module/built-in.a file
- tells where the mismatch happens much closer to the
- source. The drawback is that the same mismatch is
- reported at least twice.
- - Enable verbose reporting from modpost in order to help resolve
- the section mismatches that are reported.
config SECTION_MISMATCH_WARN_ONLY
bool "Make section mismatch errors non-fatal"
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 73e80b917f12..77c742fa4fb1 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -125,11 +125,6 @@ CC_OPTION_CFLAGS = $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS))
cc-option = $(call __cc-option, $(CC),\
$(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS),$(1),$(2))
-# hostcc-option
-# Usage: cflags-y += $(call hostcc-option,-march=winchip-c6,-march=i586)
-hostcc-option = $(call __cc-option, $(HOSTCC),\
- $(KBUILD_HOSTCFLAGS) $(HOST_EXTRACFLAGS),$(1),$(2))
-
# cc-option-yn
# Usage: flag := $(call cc-option-yn,-march=winchip-c6)
cc-option-yn = $(call try-run,\
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index be38198d98b2..0d434d0afc0b 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -63,14 +63,14 @@ ifneq ($(strip $(real-obj-y) $(need-builtin)),)
builtin-target := $(obj)/built-in.a
endif
-ifdef CONFIG_MODULES
+ifeq ($(CONFIG_MODULES)$(need-modorder),y1)
modorder-target := $(obj)/modules.order
endif
-# We keep a list of all modules in $(MODVERDIR)
+mod-targets := $(patsubst %.o, %.mod, $(obj-m))
__build: $(if $(KBUILD_BUILTIN),$(builtin-target) $(lib-target) $(extra-y)) \
- $(if $(KBUILD_MODULES),$(obj-m) $(modorder-target)) \
+ $(if $(KBUILD_MODULES),$(obj-m) $(mod-targets) $(modorder-target)) \
$(subdir-ym) $(always)
@:
@@ -87,11 +87,6 @@ ifneq ($(KBUILD_ENABLE_EXTRA_GCC_CHECKS),)
cmd_checkdoc = $(srctree)/scripts/kernel-doc -none $<
endif
-# Do section mismatch analysis for each module/built-in.a
-ifdef CONFIG_DEBUG_SECTION_MISMATCH
- cmd_secanalysis = ; scripts/mod/modpost $@
-endif
-
# Compile C sources (.c)
# ---------------------------------------------------------------------------
@@ -268,7 +263,7 @@ endef
# List module undefined symbols (or empty line if not enabled)
ifdef CONFIG_TRIM_UNUSED_KSYMS
-cmd_undef_syms = $(NM) $@ | sed -n 's/^ *U //p' | xargs echo
+cmd_undef_syms = $(NM) $< | sed -n 's/^ *U //p' | xargs echo
else
cmd_undef_syms = echo
endif
@@ -278,13 +273,15 @@ $(obj)/%.o: $(src)/%.c $(recordmcount_source) $(objtool_dep) FORCE
$(call cmd,force_checksrc)
$(call if_changed_rule,cc_o_c)
-# Single-part modules are special since we need to mark them in $(MODVERDIR)
+cmd_mod = { \
+ echo $(if $($*-objs)$($*-y)$($*-m), $(addprefix $(obj)/, $($*-objs) $($*-y) $($*-m)), $(@:.mod=.o)); \
+ $(cmd_undef_syms); \
+ } > $@
-$(single-used-m): $(obj)/%.o: $(src)/%.c $(recordmcount_source) $(objtool_dep) FORCE
- $(call cmd,force_checksrc)
- $(call if_changed_rule,cc_o_c)
- @{ echo $(@:.o=.ko); echo $@; \
- $(cmd_undef_syms); } > $(MODVERDIR)/$(@F:.o=.mod)
+$(obj)/%.mod: $(obj)/%.o FORCE
+ $(call if_changed,mod)
+
+targets += $(mod-targets)
quiet_cmd_cc_lst_c = MKLST $@
cmd_cc_lst_c = $(CC) $(c_flags) -g -c -o $*.o $< && \
@@ -294,7 +291,7 @@ quiet_cmd_cc_lst_c = MKLST $@
$(obj)/%.lst: $(src)/%.c FORCE
$(call if_changed_dep,cc_lst_c)
-# header test (header-test-y target)
+# header test (header-test-y, header-test-m target)
# ---------------------------------------------------------------------------
quiet_cmd_cc_s_h = CC $@
@@ -423,13 +420,10 @@ endif # builtin-target
#
# Create commands to either record .ko file or cat modules.order from
# a subdirectory
-modorder-cmds = \
- $(foreach m, $(modorder), \
- $(if $(filter %/modules.order, $m), \
- cat $m;, echo kernel/$m;))
-
$(modorder-target): $(subdir-ym) FORCE
- $(Q)(cat /dev/null; $(modorder-cmds)) > $@
+ $(Q){ $(foreach m, $(modorder), \
+ $(if $(filter %/modules.order, $m), cat $m, echo $m);) :; } \
+ | $(AWK) '!x[$$0]++' - > $@
#
# Rule to compile a set of .o files into one .a file (with symbol table)
@@ -464,12 +458,10 @@ endif
# module is turned into a multi object module, $^ will contain header file
# dependencies recorded in the .*.cmd file.
quiet_cmd_link_multi-m = LD [M] $@
-cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(filter %.o,$^) $(cmd_secanalysis)
+ cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(filter %.o,$^)
$(multi-used-m): FORCE
$(call if_changed,link_multi-m)
- @{ echo $(@:.o=.ko); echo $(filter %.o,$^); \
- $(cmd_undef_syms); } > $(MODVERDIR)/$(@F:.o=.mod)
$(call multi_depend, $(multi-used-m), .o, -objs -y -m)
targets += $(multi-used-m)
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 6cb3aa5cbc79..5241d0751eb0 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -78,7 +78,7 @@ header-test-y += $(filter-out $(header-test-), \
$(wildcard $(addprefix $(srctree)/$(src)/, \
$(header-test-pattern-y)))))
-extra-$(CONFIG_HEADER_TEST) += $(addsuffix .s, $(header-test-y))
+extra-$(CONFIG_HEADER_TEST) += $(addsuffix .s, $(header-test-y) $(header-test-m))
# Add subdir path
diff --git a/scripts/Makefile.modbuiltin b/scripts/Makefile.modbuiltin
index 50a9990760f3..7d4711b88656 100644
--- a/scripts/Makefile.modbuiltin
+++ b/scripts/Makefile.modbuiltin
@@ -40,7 +40,7 @@ __modbuiltin: $(modbuiltin-target) $(subdir-ym)
@:
$(modbuiltin-target): $(subdir-ym) FORCE
- $(Q)(for m in $(modbuiltin-mods); do echo kernel/$$m; done; \
+ $(Q)(for m in $(modbuiltin-mods); do echo $$m; done; \
cat /dev/null $(modbuiltin-subdirs)) > $@
PHONY += FORCE
diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst
index 0dae402661f3..5a4579e76485 100644
--- a/scripts/Makefile.modinst
+++ b/scripts/Makefile.modinst
@@ -8,10 +8,7 @@ __modinst:
include scripts/Kbuild.include
-#
-
-__modules := $(sort $(shell grep -h '\.ko$$' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
-modules := $(patsubst %.o,%.ko,$(wildcard $(__modules:.ko=.o)))
+modules := $(sort $(shell cat $(if $(KBUILD_EXTMOD),$(KBUILD_EXTMOD)/)modules.order))
PHONY += $(modules)
__modinst: $(modules)
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index fec6ec2ffa47..6b19c1a4eae5 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -6,11 +6,12 @@
# Stage one of module building created the following:
# a) The individual .o files used for the module
# b) A <module>.o file which is the .o files above linked together
-# c) A <module>.mod file in $(MODVERDIR)/, listing the name of the
-# the preliminary <module>.o file, plus all .o files
+# c) A <module>.mod file, listing the name of the preliminary <module>.o file,
+# plus all .o files
+# d) modules.order, which lists all the modules
# Stage 2 is handled by this file and does the following
-# 1) Find all modules from the files listed in $(MODVERDIR)/
+# 1) Find all modules listed in modules.order
# 2) modpost is then used to
# 3) create one <module>.mod.c file pr. module
# 4) create one Module.symvers file with CRC for all exported symbols
@@ -60,10 +61,12 @@ include scripts/Makefile.lib
kernelsymfile := $(objtree)/Module.symvers
modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers
-# Step 1), find all modules listed in $(MODVERDIR)/
-MODLISTCMD := find $(MODVERDIR) -name '*.mod' | xargs -r grep -h '\.ko$$' | sort -u
-__modules := $(shell $(MODLISTCMD))
-modules := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o)))
+modorder := $(if $(KBUILD_EXTMOD),$(KBUILD_EXTMOD)/)modules.order
+
+# Step 1), find all modules listed in modules.order
+ifdef CONFIG_MODULES
+modules := $(sort $(shell cat $(modorder)))
+endif
# Stop after building .o files if NOFINAL is set. Makes compile tests quicker
_modpost: $(if $(KBUILD_MODPOST_NOFINAL), $(modules:.ko:.o),$(modules))
@@ -84,7 +87,7 @@ MODPOST_OPT=$(subst -i,-n,$(filter -i,$(MAKEFLAGS)))
# We can go over command line length here, so be careful.
quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules
- cmd_modpost = $(MODLISTCMD) | sed 's/\.ko$$/.o/' | $(modpost) $(MODPOST_OPT) -s -T -
+ cmd_modpost = sed 's/ko$$/o/' $(modorder) | $(modpost) $(MODPOST_OPT) -s -T -
PHONY += __modpost
__modpost: $(modules:.ko=.o) FORCE
diff --git a/scripts/Makefile.modsign b/scripts/Makefile.modsign
index da56aa78d245..d7325cefe709 100644
--- a/scripts/Makefile.modsign
+++ b/scripts/Makefile.modsign
@@ -8,8 +8,7 @@ __modsign:
include scripts/Kbuild.include
-__modules := $(sort $(shell grep -h '\.ko$$' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
-modules := $(patsubst %.o,%.ko,$(wildcard $(__modules:.ko=.o)))
+modules := $(sort $(shell cat modules.order))
PHONY += $(modules)
__modsign: $(modules)
diff --git a/scripts/adjust_autoksyms.sh b/scripts/adjust_autoksyms.sh
index aab4e299d7a2..a904bf1f5e67 100755
--- a/scripts/adjust_autoksyms.sh
+++ b/scripts/adjust_autoksyms.sh
@@ -8,8 +8,7 @@
#
# Create/update the include/generated/autoksyms.h file from the list
-# of all module's needed symbols as recorded on the third line of
-# .tmp_versions/*.mod files.
+# of all module's needed symbols as recorded on the second line of *.mod files.
#
# For each symbol being added or removed, the corresponding dependency
# file's timestamp is updated to force a rebuild of the affected source
@@ -47,13 +46,10 @@ cat > "$new_ksyms_file" << EOT
*/
EOT
-[ "$(ls -A "$MODVERDIR")" ] &&
-for mod in "$MODVERDIR"/*.mod; do
- sed -n -e '3{s/ /\n/g;/^$/!p;}' "$mod"
-done | sort -u |
-while read sym; do
- echo "#define __KSYM_${sym} 1"
-done >> "$new_ksyms_file"
+sed 's/ko$/mod/' modules.order |
+xargs -n1 sed -n -e '2{s/ /\n/g;/^$/!p;}' -- |
+sort -u |
+sed -e 's/\(.*\)/#define __KSYM_\1 1/' >> "$new_ksyms_file"
# Special case for modversions (see modpost.c)
if [ -n "$CONFIG_MODVERSIONS" ]; then
diff --git a/scripts/coccinelle/api/devm_platform_ioremap_resource.cocci b/scripts/coccinelle/api/devm_platform_ioremap_resource.cocci
new file mode 100644
index 000000000000..56a2e261d61d
--- /dev/null
+++ b/scripts/coccinelle/api/devm_platform_ioremap_resource.cocci
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/// Use devm_platform_ioremap_resource helper which wraps
+/// platform_get_resource() and devm_ioremap_resource() together.
+///
+// Confidence: High
+// Copyright: (C) 2019 Himanshu Jha GPLv2.
+// Copyright: (C) 2019 Julia Lawall, Inria/LIP6. GPLv2.
+// Keywords: platform_get_resource, devm_ioremap_resource,
+// Keywords: devm_platform_ioremap_resource
+
+virtual patch
+virtual report
+
+@r depends on patch && !report@
+expression e1, e2, arg1, arg2, arg3;
+identifier id;
+@@
+
+(
+- id = platform_get_resource(arg1, IORESOURCE_MEM, arg2);
+|
+- struct resource *id = platform_get_resource(arg1, IORESOURCE_MEM, arg2);
+)
+ ... when != id
+- e1 = devm_ioremap_resource(arg3, id);
++ e1 = devm_platform_ioremap_resource(arg1, arg2);
+ ... when != id
+? id = e2
+
+@r1 depends on patch && !report@
+identifier r.id;
+type T;
+@@
+
+- T *id;
+ ...when != id
+
+@r2 depends on report && !patch@
+identifier id;
+expression e1, e2, arg1, arg2, arg3;
+position j0;
+@@
+
+(
+ id = platform_get_resource(arg1, IORESOURCE_MEM, arg2);
+|
+ struct resource *id = platform_get_resource(arg1, IORESOURCE_MEM, arg2);
+)
+ ... when != id
+ e1@j0 = devm_ioremap_resource(arg3, id);
+ ... when != id
+? id = e2
+
+@script:python depends on report && !patch@
+e1 << r2.e1;
+j0 << r2.j0;
+@@
+
+msg = "WARNING: Use devm_platform_ioremap_resource for %s" % (e1)
+coccilib.report.print_report(j0[0], msg)
diff --git a/scripts/export_report.pl b/scripts/export_report.pl
index 0f604f62f067..7d3030d03a25 100755
--- a/scripts/export_report.pl
+++ b/scripts/export_report.pl
@@ -52,13 +52,12 @@ sub usage {
sub collectcfiles {
my @file;
- while (<.tmp_versions/*.mod>) {
- open my $fh, '<', $_ or die "cannot open $_: $!\n";
- push (@file,
- grep s/\.ko/.mod.c/, # change the suffix
- grep m/.+\.ko/, # find the .ko path
- <$fh>); # lines in opened file
+ open my $fh, '< modules.order' or die "cannot open modules.order: $!\n";
+ while (<$fh>) {
+ s/\.ko$/.mod.c/;
+ push (@file, $_)
}
+ close($fh);
chomp @file;
return @file;
}
diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index ab30fe724c43..7656e1137b6b 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -94,7 +94,7 @@ configfiles=$(wildcard $(srctree)/kernel/configs/$@ $(srctree)/arch/$(SRCARCH)/c
%.config: $(obj)/conf
$(if $(call configfiles),, $(error No configuration exists for this target on this architecture))
$(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m .config $(configfiles)
- +$(Q)yes "" | $(MAKE) -f $(srctree)/Makefile oldconfig
+ $(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
PHONY += kvmconfig
kvmconfig: kvm_guest.config
diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
index 501fdcc5e999..1134892599da 100644
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -895,7 +895,8 @@ int conf_write(const char *name)
"# %s\n"
"#\n", str);
need_newline = false;
- } else if (!(sym->flags & SYMBOL_CHOICE)) {
+ } else if (!(sym->flags & SYMBOL_CHOICE) &&
+ !(sym->flags & SYMBOL_WRITTEN)) {
sym_calc_value(sym);
if (!(sym->flags & SYMBOL_WRITE))
goto next;
@@ -903,7 +904,7 @@ int conf_write(const char *name)
fprintf(out, "\n");
need_newline = false;
}
- sym->flags &= ~SYMBOL_WRITE;
+ sym->flags |= SYMBOL_WRITTEN;
conf_write_symbol(out, sym, &kconfig_printer_cb, NULL);
}
@@ -1063,8 +1064,6 @@ int conf_write_autoconf(int overwrite)
if (!overwrite && is_present(autoconf_name))
return 0;
- sym_clear_all_valid();
-
conf_write_dep("include/config/auto.conf.cmd");
if (conf_touch_deps())
diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h
index 8dde65bc3165..017843c9a4f4 100644
--- a/scripts/kconfig/expr.h
+++ b/scripts/kconfig/expr.h
@@ -141,6 +141,7 @@ struct symbol {
#define SYMBOL_OPTIONAL 0x0100 /* choice is optional - values can be 'n' */
#define SYMBOL_WRITE 0x0200 /* write symbol to file (KCONFIG_CONFIG) */
#define SYMBOL_CHANGED 0x0400 /* ? */
+#define SYMBOL_WRITTEN 0x0800 /* track info to avoid double-write to .config */
#define SYMBOL_NO_WRITE 0x1000 /* Symbol for internal use only; it will not be written */
#define SYMBOL_CHECKED 0x2000 /* used during dependency checking */
#define SYMBOL_WARNED 0x8000 /* warning has been issued */
diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c
index 0f6dcb4011a8..63062024ce0e 100644
--- a/scripts/mod/sumversion.c
+++ b/scripts/mod/sumversion.c
@@ -396,34 +396,19 @@ void get_src_version(const char *modname, char sum[], unsigned sumlen)
unsigned long len;
struct md4_ctx md;
char *sources, *end, *fname;
- const char *basename;
char filelist[PATH_MAX + 1];
- char *modverdir = getenv("MODVERDIR");
- if (!modverdir)
- modverdir = ".";
-
- /* Source files for module are in .tmp_versions/modname.mod,
- after the first line. */
- if (strrchr(modname, '/'))
- basename = strrchr(modname, '/') + 1;
- else
- basename = modname;
- snprintf(filelist, sizeof(filelist), "%s/%.*s.mod", modverdir,
- (int) strlen(basename) - 2, basename);
+ /* objects for a module are listed in the first line of *.mod file. */
+ snprintf(filelist, sizeof(filelist), "%.*smod",
+ (int)strlen(modname) - 1, modname);
file = grab_file(filelist, &len);
if (!file)
/* not a module or .mod file missing - ignore */
return;
- sources = strchr(file, '\n');
- if (!sources) {
- warn("malformed versions file for %s\n", modname);
- goto release;
- }
+ sources = file;
- sources++;
end = strchr(sources, '\n');
if (!end) {
warn("bad ending versions file for %s\n", modname);
diff --git a/scripts/modules-check.sh b/scripts/modules-check.sh
index 39e8cb36ba19..f51f446707b8 100755
--- a/scripts/modules-check.sh
+++ b/scripts/modules-check.sh
@@ -9,7 +9,7 @@ check_same_name_modules()
for m in $(sed 's:.*/::' modules.order | sort | uniq -d)
do
echo "warning: same module names found:" >&2
- sed -n "/\/$m/s:^kernel/: :p" modules.order >&2
+ sed -n "/\/$m/s:^: :p" modules.order >&2
done
}
diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index e8ca6dc97e96..c4c580f547ef 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -132,6 +132,11 @@ fi
if [ "$ARCH" != "um" ]; then
$MAKE -f $srctree/Makefile headers
$MAKE -f $srctree/Makefile headers_install INSTALL_HDR_PATH="$libc_headers_dir/usr"
+ # move asm headers to /usr/include/<libc-machine>/asm to match the structure
+ # used by Debian-based distros (to support multi-arch)
+ host_arch=$(dpkg-architecture -a$(cat debian/arch) -qDEB_HOST_MULTIARCH)
+ mkdir $libc_headers_dir/usr/include/$host_arch
+ mv $libc_headers_dir/usr/include/asm $libc_headers_dir/usr/include/$host_arch/
fi
# Install the maintainer scripts
diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian
index 8351584cb24e..e0750b70453f 100755
--- a/scripts/package/mkdebian
+++ b/scripts/package/mkdebian
@@ -197,6 +197,7 @@ Architecture: $debarch
Description: Linux support headers for userspace development
This package provides userspaces headers from the Linux kernel. These headers
are used by the installed headers for GNU glibc and other system libraries.
+Multi-Arch: same
Package: $dbg_packagename
Section: debug
diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index 2d29df4a0a53..8640c278f1aa 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -29,7 +29,7 @@ fi
PROVIDES="$PROVIDES kernel-$KERNELRELEASE"
__KERNELRELEASE=$(echo $KERNELRELEASE | sed -e "s/-/_/g")
-EXCLUDES="$RCS_TAR_IGNORE --exclude=.tmp_versions --exclude=*vmlinux* \
+EXCLUDES="$RCS_TAR_IGNORE --exclude=*vmlinux* --exclude=*.mod \
--exclude=*.o --exclude=*.ko --exclude=*.cmd --exclude=Documentation \
--exclude=.config.old --exclude=.missing-syscalls.d --exclude=*.s"
diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h
index 580e344db3dd..ced3765c4f44 100644
--- a/tools/objtool/arch.h
+++ b/tools/objtool/arch.h
@@ -11,22 +11,24 @@
#include "elf.h"
#include "cfi.h"
-#define INSN_JUMP_CONDITIONAL 1
-#define INSN_JUMP_UNCONDITIONAL 2
-#define INSN_JUMP_DYNAMIC 3
-#define INSN_CALL 4
-#define INSN_CALL_DYNAMIC 5
-#define INSN_RETURN 6
-#define INSN_CONTEXT_SWITCH 7
-#define INSN_STACK 8
-#define INSN_BUG 9
-#define INSN_NOP 10
-#define INSN_STAC 11
-#define INSN_CLAC 12
-#define INSN_STD 13
-#define INSN_CLD 14
-#define INSN_OTHER 15
-#define INSN_LAST INSN_OTHER
+enum insn_type {
+ INSN_JUMP_CONDITIONAL,
+ INSN_JUMP_UNCONDITIONAL,
+ INSN_JUMP_DYNAMIC,
+ INSN_JUMP_DYNAMIC_CONDITIONAL,
+ INSN_CALL,
+ INSN_CALL_DYNAMIC,
+ INSN_RETURN,
+ INSN_CONTEXT_SWITCH,
+ INSN_STACK,
+ INSN_BUG,
+ INSN_NOP,
+ INSN_STAC,
+ INSN_CLAC,
+ INSN_STD,
+ INSN_CLD,
+ INSN_OTHER,
+};
enum op_dest_type {
OP_DEST_REG,
@@ -68,7 +70,7 @@ void arch_initial_func_cfi_state(struct cfi_state *state);
int arch_decode_instruction(struct elf *elf, struct section *sec,
unsigned long offset, unsigned int maxlen,
- unsigned int *len, unsigned char *type,
+ unsigned int *len, enum insn_type *type,
unsigned long *immediate, struct stack_op *op);
bool arch_callee_saved_reg(unsigned char reg);
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 584568f27a83..0567c47a91b1 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -68,7 +68,7 @@ bool arch_callee_saved_reg(unsigned char reg)
int arch_decode_instruction(struct elf *elf, struct section *sec,
unsigned long offset, unsigned int maxlen,
- unsigned int *len, unsigned char *type,
+ unsigned int *len, enum insn_type *type,
unsigned long *immediate, struct stack_op *op)
{
struct insn insn;
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 172f99195726..5f26620f13f5 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -18,6 +18,8 @@
#define FAKE_JUMP_OFFSET -1
+#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table"
+
struct alternative {
struct list_head list;
struct instruction *insn;
@@ -95,6 +97,20 @@ static struct instruction *next_insn_same_func(struct objtool_file *file,
for (insn = next_insn_same_sec(file, insn); insn; \
insn = next_insn_same_sec(file, insn))
+static bool is_sibling_call(struct instruction *insn)
+{
+ /* An indirect jump is either a sibling call or a jump to a table. */
+ if (insn->type == INSN_JUMP_DYNAMIC)
+ return list_empty(&insn->alts);
+
+ if (insn->type != INSN_JUMP_CONDITIONAL &&
+ insn->type != INSN_JUMP_UNCONDITIONAL)
+ return false;
+
+ /* add_jump_destinations() sets insn->call_dest for sibling calls. */
+ return !!insn->call_dest;
+}
+
/*
* This checks to see if the given function is a "noreturn" function.
*
@@ -103,14 +119,9 @@ static struct instruction *next_insn_same_func(struct objtool_file *file,
*
* For local functions, we have to detect them manually by simply looking for
* the lack of a return instruction.
- *
- * Returns:
- * -1: error
- * 0: no dead end
- * 1: dead end
*/
-static int __dead_end_function(struct objtool_file *file, struct symbol *func,
- int recursion)
+static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
+ int recursion)
{
int i;
struct instruction *insn;
@@ -136,30 +147,33 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
"rewind_stack_do_exit",
};
+ if (!func)
+ return false;
+
if (func->bind == STB_WEAK)
- return 0;
+ return false;
if (func->bind == STB_GLOBAL)
for (i = 0; i < ARRAY_SIZE(global_noreturns); i++)
if (!strcmp(func->name, global_noreturns[i]))
- return 1;
+ return true;
if (!func->len)
- return 0;
+ return false;
insn = find_insn(file, func->sec, func->offset);
if (!insn->func)
- return 0;
+ return false;
func_for_each_insn_all(file, func, insn) {
empty = false;
if (insn->type == INSN_RETURN)
- return 0;
+ return false;
}
if (empty)
- return 0;
+ return false;
/*
* A function can have a sibling call instead of a return. In that
@@ -167,40 +181,31 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
* of the sibling call returns.
*/
func_for_each_insn_all(file, func, insn) {
- if (insn->type == INSN_JUMP_UNCONDITIONAL) {
+ if (is_sibling_call(insn)) {
struct instruction *dest = insn->jump_dest;
if (!dest)
/* sibling call to another file */
- return 0;
-
- if (dest->func && dest->func->pfunc != insn->func->pfunc) {
+ return false;
- /* local sibling call */
- if (recursion == 5) {
- /*
- * Infinite recursion: two functions
- * have sibling calls to each other.
- * This is a very rare case. It means
- * they aren't dead ends.
- */
- return 0;
- }
-
- return __dead_end_function(file, dest->func,
- recursion + 1);
+ /* local sibling call */
+ if (recursion == 5) {
+ /*
+ * Infinite recursion: two functions have
+ * sibling calls to each other. This is a very
+ * rare case. It means they aren't dead ends.
+ */
+ return false;
}
- }
- if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts))
- /* sibling call */
- return 0;
+ return __dead_end_function(file, dest->func, recursion+1);
+ }
}
- return 1;
+ return true;
}
-static int dead_end_function(struct objtool_file *file, struct symbol *func)
+static bool dead_end_function(struct objtool_file *file, struct symbol *func)
{
return __dead_end_function(file, func, 0);
}
@@ -262,19 +267,12 @@ static int decode_instructions(struct objtool_file *file)
if (ret)
goto err;
- if (!insn->type || insn->type > INSN_LAST) {
- WARN_FUNC("invalid instruction type %d",
- insn->sec, insn->offset, insn->type);
- ret = -1;
- goto err;
- }
-
hash_add(file->insn_hash, &insn->hash, insn->offset);
list_add_tail(&insn->list, &file->insn_list);
}
list_for_each_entry(func, &sec->symbol_list, list) {
- if (func->type != STT_FUNC)
+ if (func->type != STT_FUNC || func->alias != func)
continue;
if (!find_insn(file, sec, func->offset)) {
@@ -284,8 +282,7 @@ static int decode_instructions(struct objtool_file *file)
}
func_for_each_insn(file, func, insn)
- if (!insn->func)
- insn->func = func;
+ insn->func = func;
}
}
@@ -488,6 +485,7 @@ static const char *uaccess_safe_builtin[] = {
/* misc */
"csum_partial_copy_generic",
"__memcpy_mcsafe",
+ "mcsafe_handle_tail",
"ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
NULL
};
@@ -505,7 +503,7 @@ static void add_uaccess_safe(struct objtool_file *file)
if (!func)
continue;
- func->alias->uaccess_safe = true;
+ func->uaccess_safe = true;
}
}
@@ -577,13 +575,16 @@ static int add_jump_destinations(struct objtool_file *file)
* Retpoline jumps are really dynamic jumps in
* disguise, so convert them accordingly.
*/
- insn->type = INSN_JUMP_DYNAMIC;
+ if (insn->type == INSN_JUMP_UNCONDITIONAL)
+ insn->type = INSN_JUMP_DYNAMIC;
+ else
+ insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL;
+
insn->retpoline_safe = true;
continue;
} else {
- /* sibling call */
+ /* external sibling call */
insn->call_dest = rela->sym;
- insn->jump_dest = NULL;
continue;
}
@@ -623,7 +624,7 @@ static int add_jump_destinations(struct objtool_file *file)
* However this code can't completely replace the
* read_symbols() code because this doesn't detect the
* case where the parent function's only reference to a
- * subfunction is through a switch table.
+ * subfunction is through a jump table.
*/
if (!strstr(insn->func->name, ".cold.") &&
strstr(insn->jump_dest->func->name, ".cold.")) {
@@ -633,9 +634,8 @@ static int add_jump_destinations(struct objtool_file *file)
} else if (insn->jump_dest->func->pfunc != insn->func->pfunc &&
insn->jump_dest->offset == insn->jump_dest->func->offset) {
- /* sibling class */
+ /* internal sibling call */
insn->call_dest = insn->jump_dest->func;
- insn->jump_dest = NULL;
}
}
}
@@ -896,20 +896,26 @@ out:
return ret;
}
-static int add_switch_table(struct objtool_file *file, struct instruction *insn,
- struct rela *table, struct rela *next_table)
+static int add_jump_table(struct objtool_file *file, struct instruction *insn,
+ struct rela *table)
{
struct rela *rela = table;
- struct instruction *alt_insn;
+ struct instruction *dest_insn;
struct alternative *alt;
struct symbol *pfunc = insn->func->pfunc;
unsigned int prev_offset = 0;
- list_for_each_entry_from(rela, &table->rela_sec->rela_list, list) {
- if (rela == next_table)
+ /*
+ * Each @rela is a switch table relocation which points to the target
+ * instruction.
+ */
+ list_for_each_entry_from(rela, &table->sec->rela_list, list) {
+
+ /* Check for the end of the table: */
+ if (rela != table && rela->jump_table_start)
break;
- /* Make sure the switch table entries are consecutive: */
+ /* Make sure the table entries are consecutive: */
if (prev_offset && rela->offset != prev_offset + 8)
break;
@@ -918,12 +924,12 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn,
rela->addend == pfunc->offset)
break;
- alt_insn = find_insn(file, rela->sym->sec, rela->addend);
- if (!alt_insn)
+ dest_insn = find_insn(file, rela->sym->sec, rela->addend);
+ if (!dest_insn)
break;
- /* Make sure the jmp dest is in the function or subfunction: */
- if (alt_insn->func->pfunc != pfunc)
+ /* Make sure the destination is in the same function: */
+ if (!dest_insn->func || dest_insn->func->pfunc != pfunc)
break;
alt = malloc(sizeof(*alt));
@@ -932,7 +938,7 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn,
return -1;
}
- alt->insn = alt_insn;
+ alt->insn = dest_insn;
list_add_tail(&alt->list, &insn->alts);
prev_offset = rela->offset;
}
@@ -947,7 +953,7 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn,
}
/*
- * find_switch_table() - Given a dynamic jump, find the switch jump table in
+ * find_jump_table() - Given a dynamic jump, find the switch jump table in
* .rodata associated with it.
*
* There are 3 basic patterns:
@@ -989,13 +995,13 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn,
*
* NOTE: RETPOLINE made it harder still to decode dynamic jumps.
*/
-static struct rela *find_switch_table(struct objtool_file *file,
+static struct rela *find_jump_table(struct objtool_file *file,
struct symbol *func,
struct instruction *insn)
{
- struct rela *text_rela, *rodata_rela;
+ struct rela *text_rela, *table_rela;
struct instruction *orig_insn = insn;
- struct section *rodata_sec;
+ struct section *table_sec;
unsigned long table_offset;
/*
@@ -1028,42 +1034,52 @@ static struct rela *find_switch_table(struct objtool_file *file,
continue;
table_offset = text_rela->addend;
- rodata_sec = text_rela->sym->sec;
+ table_sec = text_rela->sym->sec;
if (text_rela->type == R_X86_64_PC32)
table_offset += 4;
/*
* Make sure the .rodata address isn't associated with a
- * symbol. gcc jump tables are anonymous data.
+ * symbol. GCC jump tables are anonymous data.
+ *
+ * Also support C jump tables which are in the same format as
+ * switch jump tables. For objtool to recognize them, they
+ * need to be placed in the C_JUMP_TABLE_SECTION section. They
+ * have symbols associated with them.
*/
- if (find_symbol_containing(rodata_sec, table_offset))
+ if (find_symbol_containing(table_sec, table_offset) &&
+ strcmp(table_sec->name, C_JUMP_TABLE_SECTION))
continue;
- rodata_rela = find_rela_by_dest(rodata_sec, table_offset);
- if (rodata_rela) {
- /*
- * Use of RIP-relative switch jumps is quite rare, and
- * indicates a rare GCC quirk/bug which can leave dead
- * code behind.
- */
- if (text_rela->type == R_X86_64_PC32)
- file->ignore_unreachables = true;
+ /* Each table entry has a rela associated with it. */
+ table_rela = find_rela_by_dest(table_sec, table_offset);
+ if (!table_rela)
+ continue;
- return rodata_rela;
- }
+ /*
+ * Use of RIP-relative switch jumps is quite rare, and
+ * indicates a rare GCC quirk/bug which can leave dead code
+ * behind.
+ */
+ if (text_rela->type == R_X86_64_PC32)
+ file->ignore_unreachables = true;
+
+ return table_rela;
}
return NULL;
}
-
-static int add_func_switch_tables(struct objtool_file *file,
- struct symbol *func)
+/*
+ * First pass: Mark the head of each jump table so that in the next pass,
+ * we know when a given jump table ends and the next one starts.
+ */
+static void mark_func_jump_tables(struct objtool_file *file,
+ struct symbol *func)
{
- struct instruction *insn, *last = NULL, *prev_jump = NULL;
- struct rela *rela, *prev_rela = NULL;
- int ret;
+ struct instruction *insn, *last = NULL;
+ struct rela *rela;
func_for_each_insn_all(file, func, insn) {
if (!last)
@@ -1071,7 +1087,7 @@ static int add_func_switch_tables(struct objtool_file *file,
/*
* Store back-pointers for unconditional forward jumps such
- * that find_switch_table() can back-track using those and
+ * that find_jump_table() can back-track using those and
* avoid some potentially confusing code.
*/
if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest &&
@@ -1086,27 +1102,25 @@ static int add_func_switch_tables(struct objtool_file *file,
if (insn->type != INSN_JUMP_DYNAMIC)
continue;
- rela = find_switch_table(file, func, insn);
- if (!rela)
- continue;
-
- /*
- * We found a switch table, but we don't know yet how big it
- * is. Don't add it until we reach the end of the function or
- * the beginning of another switch table in the same function.
- */
- if (prev_jump) {
- ret = add_switch_table(file, prev_jump, prev_rela, rela);
- if (ret)
- return ret;
+ rela = find_jump_table(file, func, insn);
+ if (rela) {
+ rela->jump_table_start = true;
+ insn->jump_table = rela;
}
-
- prev_jump = insn;
- prev_rela = rela;
}
+}
+
+static int add_func_jump_tables(struct objtool_file *file,
+ struct symbol *func)
+{
+ struct instruction *insn;
+ int ret;
- if (prev_jump) {
- ret = add_switch_table(file, prev_jump, prev_rela, NULL);
+ func_for_each_insn_all(file, func, insn) {
+ if (!insn->jump_table)
+ continue;
+
+ ret = add_jump_table(file, insn, insn->jump_table);
if (ret)
return ret;
}
@@ -1119,7 +1133,7 @@ static int add_func_switch_tables(struct objtool_file *file,
* section which contains a list of addresses within the function to jump to.
* This finds these jump tables and adds them to the insn->alts lists.
*/
-static int add_switch_table_alts(struct objtool_file *file)
+static int add_jump_table_alts(struct objtool_file *file)
{
struct section *sec;
struct symbol *func;
@@ -1133,7 +1147,8 @@ static int add_switch_table_alts(struct objtool_file *file)
if (func->type != STT_FUNC)
continue;
- ret = add_func_switch_tables(file, func);
+ mark_func_jump_tables(file, func);
+ ret = add_func_jump_tables(file, func);
if (ret)
return ret;
}
@@ -1277,13 +1292,18 @@ static void mark_rodata(struct objtool_file *file)
bool found = false;
/*
- * This searches for the .rodata section or multiple .rodata.func_name
- * sections if -fdata-sections is being used. The .str.1.1 and .str.1.8
- * rodata sections are ignored as they don't contain jump tables.
+ * Search for the following rodata sections, each of which can
+ * potentially contain jump tables:
+ *
+ * - .rodata: can contain GCC switch tables
+ * - .rodata.<func>: same, if -fdata-sections is being used
+ * - .rodata..c_jump_table: contains C annotated jump tables
+ *
+ * .rodata.str1.* sections are ignored; they don't contain jump tables.
*/
for_each_sec(file, sec) {
- if (!strncmp(sec->name, ".rodata", 7) &&
- !strstr(sec->name, ".str1.")) {
+ if ((!strncmp(sec->name, ".rodata", 7) && !strstr(sec->name, ".str1.")) ||
+ !strcmp(sec->name, C_JUMP_TABLE_SECTION)) {
sec->rodata = true;
found = true;
}
@@ -1325,7 +1345,7 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
- ret = add_switch_table_alts(file);
+ ret = add_jump_table_alts(file);
if (ret)
return ret;
@@ -1873,12 +1893,12 @@ static bool insn_state_match(struct instruction *insn, struct insn_state *state)
static inline bool func_uaccess_safe(struct symbol *func)
{
if (func)
- return func->alias->uaccess_safe;
+ return func->uaccess_safe;
return false;
}
-static inline const char *insn_dest_name(struct instruction *insn)
+static inline const char *call_dest_name(struct instruction *insn)
{
if (insn->call_dest)
return insn->call_dest->name;
@@ -1890,13 +1910,13 @@ static int validate_call(struct instruction *insn, struct insn_state *state)
{
if (state->uaccess && !func_uaccess_safe(insn->call_dest)) {
WARN_FUNC("call to %s() with UACCESS enabled",
- insn->sec, insn->offset, insn_dest_name(insn));
+ insn->sec, insn->offset, call_dest_name(insn));
return 1;
}
if (state->df) {
WARN_FUNC("call to %s() with DF set",
- insn->sec, insn->offset, insn_dest_name(insn));
+ insn->sec, insn->offset, call_dest_name(insn));
return 1;
}
@@ -1920,13 +1940,12 @@ static int validate_sibling_call(struct instruction *insn, struct insn_state *st
* each instruction and validate all the rules described in
* tools/objtool/Documentation/stack-validation.txt.
*/
-static int validate_branch(struct objtool_file *file, struct instruction *first,
- struct insn_state state)
+static int validate_branch(struct objtool_file *file, struct symbol *func,
+ struct instruction *first, struct insn_state state)
{
struct alternative *alt;
struct instruction *insn, *next_insn;
struct section *sec;
- struct symbol *func = NULL;
int ret;
insn = first;
@@ -1947,9 +1966,6 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
return 1;
}
- if (insn->func)
- func = insn->func->pfunc;
-
if (func && insn->ignore) {
WARN_FUNC("BUG: why am I validating an ignored function?",
sec, insn->offset);
@@ -1971,7 +1987,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
i = insn;
save_insn = NULL;
- func_for_each_insn_continue_reverse(file, insn->func, i) {
+ func_for_each_insn_continue_reverse(file, func, i) {
if (i->save) {
save_insn = i;
break;
@@ -2017,7 +2033,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
if (alt->skip_orig)
skip_orig = true;
- ret = validate_branch(file, alt->insn, state);
+ ret = validate_branch(file, func, alt->insn, state);
if (ret) {
if (backtrace)
BT_FUNC("(alt)", insn);
@@ -2055,7 +2071,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
if (state.bp_scratch) {
WARN("%s uses BP as a scratch register",
- insn->func->name);
+ func->name);
return 1;
}
@@ -2067,36 +2083,28 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
if (ret)
return ret;
- if (insn->type == INSN_CALL) {
- if (is_fentry_call(insn))
- break;
-
- ret = dead_end_function(file, insn->call_dest);
- if (ret == 1)
- return 0;
- if (ret == -1)
- return 1;
- }
-
- if (!no_fp && func && !has_valid_stack_frame(&state)) {
+ if (!no_fp && func && !is_fentry_call(insn) &&
+ !has_valid_stack_frame(&state)) {
WARN_FUNC("call without frame pointer save/setup",
sec, insn->offset);
return 1;
}
+
+ if (dead_end_function(file, insn->call_dest))
+ return 0;
+
break;
case INSN_JUMP_CONDITIONAL:
case INSN_JUMP_UNCONDITIONAL:
- if (func && !insn->jump_dest) {
+ if (func && is_sibling_call(insn)) {
ret = validate_sibling_call(insn, &state);
if (ret)
return ret;
- } else if (insn->jump_dest &&
- (!func || !insn->jump_dest->func ||
- insn->jump_dest->func->pfunc == func)) {
- ret = validate_branch(file, insn->jump_dest,
- state);
+ } else if (insn->jump_dest) {
+ ret = validate_branch(file, func,
+ insn->jump_dest, state);
if (ret) {
if (backtrace)
BT_FUNC("(branch)", insn);
@@ -2110,13 +2118,17 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
break;
case INSN_JUMP_DYNAMIC:
- if (func && list_empty(&insn->alts)) {
+ case INSN_JUMP_DYNAMIC_CONDITIONAL:
+ if (func && is_sibling_call(insn)) {
ret = validate_sibling_call(insn, &state);
if (ret)
return ret;
}
- return 0;
+ if (insn->type == INSN_JUMP_DYNAMIC)
+ return 0;
+
+ break;
case INSN_CONTEXT_SWITCH:
if (func && (!next_insn || !next_insn->hint)) {
@@ -2162,7 +2174,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
break;
case INSN_CLAC:
- if (!state.uaccess && insn->func) {
+ if (!state.uaccess && func) {
WARN_FUNC("redundant UACCESS disable", sec, insn->offset);
return 1;
}
@@ -2183,7 +2195,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
break;
case INSN_CLD:
- if (!state.df && insn->func)
+ if (!state.df && func)
WARN_FUNC("redundant CLD", sec, insn->offset);
state.df = false;
@@ -2222,7 +2234,7 @@ static int validate_unwind_hints(struct objtool_file *file)
for_each_insn(file, insn) {
if (insn->hint && !insn->visited) {
- ret = validate_branch(file, insn, state);
+ ret = validate_branch(file, insn->func, insn, state);
if (ret && backtrace)
BT_FUNC("<=== (hint)", insn);
warnings += ret;
@@ -2345,16 +2357,25 @@ static int validate_functions(struct objtool_file *file)
for_each_sec(file, sec) {
list_for_each_entry(func, &sec->symbol_list, list) {
- if (func->type != STT_FUNC || func->pfunc != func)
+ if (func->type != STT_FUNC)
+ continue;
+
+ if (!func->len) {
+ WARN("%s() is missing an ELF size annotation",
+ func->name);
+ warnings++;
+ }
+
+ if (func->pfunc != func || func->alias != func)
continue;
insn = find_insn(file, sec, func->offset);
- if (!insn || insn->ignore)
+ if (!insn || insn->ignore || insn->visited)
continue;
- state.uaccess = func->alias->uaccess_safe;
+ state.uaccess = func->uaccess_safe;
- ret = validate_branch(file, insn, state);
+ ret = validate_branch(file, func, insn, state);
if (ret && backtrace)
BT_FUNC("<=== (func)", insn);
warnings += ret;
@@ -2407,7 +2428,7 @@ int check(const char *_objname, bool orc)
objname = _objname;
- file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY);
+ file.elf = elf_read(objname, orc ? O_RDWR : O_RDONLY);
if (!file.elf)
return 1;
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index cb60b9acf5cf..b881fafcf55d 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -31,13 +31,14 @@ struct instruction {
struct section *sec;
unsigned long offset;
unsigned int len;
- unsigned char type;
+ enum insn_type type;
unsigned long immediate;
bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
bool retpoline_safe;
struct symbol *call_dest;
struct instruction *jump_dest;
struct instruction *first_jump_src;
+ struct rela *jump_table;
struct list_head alts;
struct symbol *func;
struct stack_op stack_op;
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index e99e1be19ad9..edba4745f25a 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -278,7 +278,7 @@ static int read_symbols(struct elf *elf)
}
if (sym->offset == s->offset) {
- if (sym->len == s->len && alias == sym)
+ if (sym->len && sym->len == s->len && alias == sym)
alias = s;
if (sym->len >= s->len) {
@@ -385,7 +385,7 @@ static int read_relas(struct elf *elf)
rela->offset = rela->rela.r_offset;
symndx = GELF_R_SYM(rela->rela.r_info);
rela->sym = find_symbol_by_index(elf, symndx);
- rela->rela_sec = sec;
+ rela->sec = sec;
if (!rela->sym) {
WARN("can't find rela entry symbol %d for %s",
symndx, sec->name);
@@ -401,7 +401,7 @@ static int read_relas(struct elf *elf)
return 0;
}
-struct elf *elf_open(const char *name, int flags)
+struct elf *elf_read(const char *name, int flags)
{
struct elf *elf;
Elf_Cmd cmd;
@@ -463,7 +463,7 @@ struct section *elf_create_section(struct elf *elf, const char *name,
{
struct section *sec, *shstrtab;
size_t size = entsize * nr;
- struct Elf_Scn *s;
+ Elf_Scn *s;
Elf_Data *data;
sec = malloc(sizeof(*sec));
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index e44ca5d51871..44150204db4d 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -57,11 +57,12 @@ struct rela {
struct list_head list;
struct hlist_node hash;
GElf_Rela rela;
- struct section *rela_sec;
+ struct section *sec;
struct symbol *sym;
unsigned int type;
unsigned long offset;
int addend;
+ bool jump_table_start;
};
struct elf {
@@ -74,7 +75,7 @@ struct elf {
};
-struct elf *elf_open(const char *name, int flags);
+struct elf *elf_read(const char *name, int flags);
struct section *find_section_by_name(struct elf *elf, const char *name);
struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
struct symbol *find_symbol_by_name(struct elf *elf, const char *name);
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 79367087bd18..8f24865596af 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2289,6 +2289,12 @@ static int process_switch_event(struct perf_tool *tool,
if (perf_event__process_switch(tool, event, sample, machine) < 0)
return -1;
+ if (scripting_ops && scripting_ops->process_switch)
+ scripting_ops->process_switch(event, sample, machine);
+
+ if (!script->show_switch_events)
+ return 0;
+
thread = machine__findnew_thread(machine, sample->pid,
sample->tid);
if (thread == NULL) {
@@ -2467,7 +2473,7 @@ static int __cmd_script(struct perf_script *script)
script->tool.mmap = process_mmap_event;
script->tool.mmap2 = process_mmap2_event;
}
- if (script->show_switch_events)
+ if (script->show_switch_events || (scripting_ops && scripting_ops->process_switch))
script->tool.context_switch = process_switch_event;
if (script->show_namespace_events)
script->tool.namespaces = process_namespaces_event;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 1aa2ed096f65..4f0bbffee05f 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -19,6 +19,7 @@
#include <api/fs/tracing_path.h>
#include <bpf/bpf.h>
#include "util/bpf_map.h"
+#include "util/rlimit.h"
#include "builtin.h"
#include "util/cgroup.h"
#include "util/color.h"
@@ -3864,6 +3865,15 @@ int cmd_trace(int argc, const char **argv)
goto out;
}
+ /*
+ * Parsing .perfconfig may entail creating a BPF event, that may need
+ * to create BPF maps, so bump RLIM_MEMLOCK as the default 64K setting
+ * is too small. This affects just this process, not touching the
+ * global setting. If it fails we'll get something in 'perf trace -v'
+ * to help diagnose the problem.
+ */
+ rlimit__bump_memlock();
+
err = perf_config(trace__config, &trace);
if (err)
goto out;
diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c
index f470144d1a70..bf114ca9ca87 100644
--- a/tools/perf/builtin-version.c
+++ b/tools/perf/builtin-version.c
@@ -19,6 +19,7 @@ static struct version version;
static struct option version_options[] = {
OPT_BOOLEAN(0, "build-options", &version.build_options,
"display the build options"),
+ OPT_END(),
};
static const char * const version_usage[] = {
diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/basic.json b/tools/perf/pmu-events/arch/s390/cf_m8561/basic.json
new file mode 100644
index 000000000000..17fb5241928b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_m8561/basic.json
@@ -0,0 +1,58 @@
+[
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "0",
+ "EventName": "CPU_CYCLES",
+ "BriefDescription": "CPU Cycles",
+ "PublicDescription": "Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "1",
+ "EventName": "INSTRUCTIONS",
+ "BriefDescription": "Instructions",
+ "PublicDescription": "Instruction Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "2",
+ "EventName": "L1I_DIR_WRITES",
+ "BriefDescription": "L1I Directory Writes",
+ "PublicDescription": "Level-1 I-Cache Directory Write Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "3",
+ "EventName": "L1I_PENALTY_CYCLES",
+ "BriefDescription": "L1I Penalty Cycles",
+ "PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "4",
+ "EventName": "L1D_DIR_WRITES",
+ "BriefDescription": "L1D Directory Writes",
+ "PublicDescription": "Level-1 D-Cache Directory Write Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "5",
+ "EventName": "L1D_PENALTY_CYCLES",
+ "BriefDescription": "L1D Penalty Cycles",
+ "PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "32",
+ "EventName": "PROBLEM_STATE_CPU_CYCLES",
+ "BriefDescription": "Problem-State CPU Cycles",
+ "PublicDescription": "Problem-State Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "33",
+ "EventName": "PROBLEM_STATE_INSTRUCTIONS",
+ "BriefDescription": "Problem-State Instructions",
+ "PublicDescription": "Problem-State Instruction Count"
+ },
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json b/tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json
new file mode 100644
index 000000000000..db286f19e7b6
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json
@@ -0,0 +1,114 @@
+[
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "64",
+ "EventName": "PRNG_FUNCTIONS",
+ "BriefDescription": "PRNG Functions",
+ "PublicDescription": "Total number of the PRNG functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "65",
+ "EventName": "PRNG_CYCLES",
+ "BriefDescription": "PRNG Cycles",
+ "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "66",
+ "EventName": "PRNG_BLOCKED_FUNCTIONS",
+ "BriefDescription": "PRNG Blocked Functions",
+ "PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "67",
+ "EventName": "PRNG_BLOCKED_CYCLES",
+ "BriefDescription": "PRNG Blocked Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "68",
+ "EventName": "SHA_FUNCTIONS",
+ "BriefDescription": "SHA Functions",
+ "PublicDescription": "Total number of SHA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "69",
+ "EventName": "SHA_CYCLES",
+ "BriefDescription": "SHA Cycles",
+ "PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "70",
+ "EventName": "SHA_BLOCKED_FUNCTIONS",
+ "BriefDescription": "SHA Blocked Functions",
+ "PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "71",
+ "EventName": "SHA_BLOCKED_CYCLES",
+ "BriefDescription": "SHA Bloced Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "72",
+ "EventName": "DEA_FUNCTIONS",
+ "BriefDescription": "DEA Functions",
+ "PublicDescription": "Total number of the DEA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "73",
+ "EventName": "DEA_CYCLES",
+ "BriefDescription": "DEA Cycles",
+ "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "74",
+ "EventName": "DEA_BLOCKED_FUNCTIONS",
+ "BriefDescription": "DEA Blocked Functions",
+ "PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "75",
+ "EventName": "DEA_BLOCKED_CYCLES",
+ "BriefDescription": "DEA Blocked Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "76",
+ "EventName": "AES_FUNCTIONS",
+ "BriefDescription": "AES Functions",
+ "PublicDescription": "Total number of AES functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "77",
+ "EventName": "AES_CYCLES",
+ "BriefDescription": "AES Cycles",
+ "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "78",
+ "EventName": "AES_BLOCKED_FUNCTIONS",
+ "BriefDescription": "AES Blocked Functions",
+ "PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "79",
+ "EventName": "AES_BLOCKED_CYCLES",
+ "BriefDescription": "AES Blocked Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json b/tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json
new file mode 100644
index 000000000000..5e36bc2468d0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json
@@ -0,0 +1,30 @@
+[
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "80",
+ "EventName": "ECC_FUNCTION_COUNT",
+ "BriefDescription": "ECC Function Count",
+ "PublicDescription": "Long ECC function Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "81",
+ "EventName": "ECC_CYCLES_COUNT",
+ "BriefDescription": "ECC Cycles Count",
+ "PublicDescription": "Long ECC Function cycles count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "82",
+ "EventName": "ECC_BLOCKED_FUNCTION_COUNT",
+ "BriefDescription": "Ecc Blocked Function Count",
+ "PublicDescription": "Long ECC blocked function count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "83",
+ "EventName": "ECC_BLOCKED_CYCLES_COUNT",
+ "BriefDescription": "ECC Blocked Cycles Count",
+ "PublicDescription": "Long ECC blocked cycles count"
+ },
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/extended.json b/tools/perf/pmu-events/arch/s390/cf_m8561/extended.json
new file mode 100644
index 000000000000..89e070727e1b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_m8561/extended.json
@@ -0,0 +1,373 @@
+[
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "128",
+ "EventName": "L1D_RO_EXCL_WRITES",
+ "BriefDescription": "L1D Read-only Exclusive Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "129",
+ "EventName": "DTLB2_WRITES",
+ "BriefDescription": "DTLB2 Writes",
+ "PublicDescription": "A translation has been written into The Translation Lookaside Buffer 2 (TLB2) and the request was made by the data cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "130",
+ "EventName": "DTLB2_MISSES",
+ "BriefDescription": "DTLB2 Misses",
+ "PublicDescription": "A TLB2 miss is in progress for a request made by the data cache. Incremented by one for every TLB2 miss in progress for the Level-1 Data cache on this cycle"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "131",
+ "EventName": "DTLB2_HPAGE_WRITES",
+ "BriefDescription": "DTLB2 One-Megabyte Page Writes",
+ "PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page or a Last Host Translation was done"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "132",
+ "EventName": "DTLB2_GPAGE_WRITES",
+ "BriefDescription": "DTLB2 Two-Gigabyte Page Writes",
+ "PublicDescription": "A translation entry for a two-gigabyte page was written into the Level-2 TLB"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "133",
+ "EventName": "L1D_L2D_SOURCED_WRITES",
+ "BriefDescription": "L1D L2D Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Data cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "134",
+ "EventName": "ITLB2_WRITES",
+ "BriefDescription": "ITLB2 Writes",
+ "PublicDescription": "A translation entry has been written into the Translation Lookaside Buffer 2 (TLB2) and the request was made by the instruction cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "135",
+ "EventName": "ITLB2_MISSES",
+ "BriefDescription": "ITLB2 Misses",
+ "PublicDescription": "A TLB2 miss is in progress for a request made by the instruction cache. Incremented by one for every TLB2 miss in progress for the Level-1 Instruction cache in a cycle"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "136",
+ "EventName": "L1I_L2I_SOURCED_WRITES",
+ "BriefDescription": "L1I L2I Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the Level-2 Instruction cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "137",
+ "EventName": "TLB2_PTE_WRITES",
+ "BriefDescription": "TLB2 PTE Writes",
+ "PublicDescription": "A translation entry was written into the Page Table Entry array in the Level-2 TLB"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "138",
+ "EventName": "TLB2_CRSTE_WRITES",
+ "BriefDescription": "TLB2 CRSTE Writes",
+ "PublicDescription": "Translation entries were written into the Combined Region and Segment Table Entry array and the Page Table Entry array in the Level-2 TLB"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "139",
+ "EventName": "TLB2_ENGINES_BUSY",
+ "BriefDescription": "TLB2 Engines Busy",
+ "PublicDescription": "The number of Level-2 TLB translation engines busy in a cycle"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "140",
+ "EventName": "TX_C_TEND",
+ "BriefDescription": "Completed TEND instructions in constrained TX mode",
+ "PublicDescription": "A TEND instruction has completed in a constrained transactional-execution mode"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "141",
+ "EventName": "TX_NC_TEND",
+ "BriefDescription": "Completed TEND instructions in non-constrained TX mode",
+ "PublicDescription": "A TEND instruction has completed in a non-constrained transactional-execution mode"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "143",
+ "EventName": "L1C_TLB2_MISSES",
+ "BriefDescription": "L1C TLB2 Misses",
+ "PublicDescription": "Increments by one for any cycle where a level-1 cache or level-2 TLB miss is in progress"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "144",
+ "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES",
+ "BriefDescription": "L1D On-Chip L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "145",
+ "EventName": "L1D_ONCHIP_MEMORY_SOURCED_WRITES",
+ "BriefDescription": "L1D On-Chip Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "146",
+ "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1D On-Chip L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "147",
+ "EventName": "L1D_ONCLUSTER_L3_SOURCED_WRITES",
+ "BriefDescription": "L1D On-Cluster L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Cluster Level-3 cache withountervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "148",
+ "EventName": "L1D_ONCLUSTER_MEMORY_SOURCED_WRITES",
+ "BriefDescription": "L1D On-Cluster Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Cluster memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "149",
+ "EventName": "L1D_ONCLUSTER_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1D On-Cluster L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Cluster Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "150",
+ "EventName": "L1D_OFFCLUSTER_L3_SOURCED_WRITES",
+ "BriefDescription": "L1D Off-Cluster L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "151",
+ "EventName": "L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES",
+ "BriefDescription": "L1D Off-Cluster Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Cluster memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "152",
+ "EventName": "L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1D Off-Cluster L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "153",
+ "EventName": "L1D_OFFDRAWER_L3_SOURCED_WRITES",
+ "BriefDescription": "L1D Off-Drawer L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "154",
+ "EventName": "L1D_OFFDRAWER_MEMORY_SOURCED_WRITES",
+ "BriefDescription": "L1D Off-Drawer Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Drawer memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "155",
+ "EventName": "L1D_OFFDRAWER_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1D Off-Drawer L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "156",
+ "EventName": "L1D_ONDRAWER_L4_SOURCED_WRITES",
+ "BriefDescription": "L1D On-Drawer L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Drawer Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "157",
+ "EventName": "L1D_OFFDRAWER_L4_SOURCED_WRITES",
+ "BriefDescription": "L1D Off-Drawer L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Drawer Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "158",
+ "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_RO",
+ "BriefDescription": "L1D On-Chip L3 Sourced Writes read-only",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip L3 but a read-only invalidate was done to remove other copies of the cache line"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "162",
+ "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES",
+ "BriefDescription": "L1I On-Chip L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from an On-Chip Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "163",
+ "EventName": "L1I_ONCHIP_MEMORY_SOURCED_WRITES",
+ "BriefDescription": "L1I On-Chip Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from On-Chip memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "164",
+ "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1I On-Chip L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from an On-Chip Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "165",
+ "EventName": "L1I_ONCLUSTER_L3_SOURCED_WRITES",
+ "BriefDescription": "L1I On-Cluster L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Cluster Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "166",
+ "EventName": "L1I_ONCLUSTER_MEMORY_SOURCED_WRITES",
+ "BriefDescription": "L1I On-Cluster Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Cluster memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "167",
+ "EventName": "L1I_ONCLUSTER_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1I On-Cluster L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Cluster Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "168",
+ "EventName": "L1I_OFFCLUSTER_L3_SOURCED_WRITES",
+ "BriefDescription": "L1I Off-Cluster L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "169",
+ "EventName": "L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES",
+ "BriefDescription": "L1I Off-Cluster Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Cluster memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "170",
+ "EventName": "L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1I Off-Cluster L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "171",
+ "EventName": "L1I_OFFDRAWER_L3_SOURCED_WRITES",
+ "BriefDescription": "L1I Off-Drawer L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "172",
+ "EventName": "L1I_OFFDRAWER_MEMORY_SOURCED_WRITES",
+ "BriefDescription": "L1I Off-Drawer Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "173",
+ "EventName": "L1I_OFFDRAWER_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1I Off-Drawer L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "174",
+ "EventName": "L1I_ONDRAWER_L4_SOURCED_WRITES",
+ "BriefDescription": "L1I On-Drawer L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Drawer Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "175",
+ "EventName": "L1I_OFFDRAWER_L4_SOURCED_WRITES",
+ "BriefDescription": "L1I Off-Drawer L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "224",
+ "EventName": "BCD_DFP_EXECUTION_SLOTS",
+ "BriefDescription": "BCD DFP Execution Slots",
+ "PublicDescription": "Count of floating point execution slots used for finished Binary Coded Decimal to Decimal Floating Point conversions. Instructions: CDZT, CXZT, CZDT, CZXT"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "225",
+ "EventName": "VX_BCD_EXECUTION_SLOTS",
+ "BriefDescription": "VX BCD Execution Slots",
+ "PublicDescription": "Count of floating point execution slots used for finished vector arithmetic Binary Coded Decimal instructions. Instructions: VAP, VSP, VMPVMSP, VDP, VSDP, VRP, VLIP, VSRP, VPSOPVCP, VTP, VPKZ, VUPKZ, VCVB, VCVBG, VCVDVCVDG"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "226",
+ "EventName": "DECIMAL_INSTRUCTIONS",
+ "BriefDescription": "Decimal Instructions",
+ "PublicDescription": "Decimal instructions dispatched. Instructions: CVB, CVD, AP, CP, DP, ED, EDMK, MP, SRP, SP, ZAP"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "232",
+ "EventName": "LAST_HOST_TRANSLATIONS",
+ "BriefDescription": "Last host translation done",
+ "PublicDescription": "Last Host Translation done"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "243",
+ "EventName": "TX_NC_TABORT",
+ "BriefDescription": "Aborted transactions in non-constrained TX mode",
+ "PublicDescription": "A transaction abort has occurred in a non-constrained transactional-execution mode"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "244",
+ "EventName": "TX_C_TABORT_NO_SPECIAL",
+ "BriefDescription": "Aborted transactions in constrained TX mode not using special completion logic",
+ "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is not using any special logic to allow the transaction to complete"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "245",
+ "EventName": "TX_C_TABORT_SPECIAL",
+ "BriefDescription": "Aborted transactions in constrained TX mode using special completion logic",
+ "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "448",
+ "EventName": "MT_DIAG_CYCLES_ONE_THR_ACTIVE",
+ "BriefDescription": "Cycle count with one thread active",
+ "PublicDescription": "Cycle count with one thread active"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "449",
+ "EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE",
+ "BriefDescription": "Cycle count with two threads active",
+ "PublicDescription": "Cycle count with two threads active"
+ },
+]
diff --git a/tools/perf/pmu-events/arch/s390/mapfile.csv b/tools/perf/pmu-events/arch/s390/mapfile.csv
index 78bcf7f8e206..bd3fc577139c 100644
--- a/tools/perf/pmu-events/arch/s390/mapfile.csv
+++ b/tools/perf/pmu-events/arch/s390/mapfile.csv
@@ -4,3 +4,4 @@ Family-model,Version,Filename,EventType
^IBM.282[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_zec12,core
^IBM.296[45].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z13,core
^IBM.390[67].*[13]\.[1-5].[[:xdigit:]]+$,3,cf_z14,core
+^IBM.856[12].*3\.6.[[:xdigit:]]+$,3,cf_m8561,core
diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
index 92713d93e956..7bd73a904b4e 100644
--- a/tools/perf/scripts/python/export-to-postgresql.py
+++ b/tools/perf/scripts/python/export-to-postgresql.py
@@ -353,7 +353,10 @@ do_query(query, 'CREATE TABLE threads ('
'tid integer)')
do_query(query, 'CREATE TABLE comms ('
'id bigint NOT NULL,'
- 'comm varchar(16))')
+ 'comm varchar(16),'
+ 'c_thread_id bigint,'
+ 'c_time bigint,'
+ 'exec_flag boolean)')
do_query(query, 'CREATE TABLE comm_threads ('
'id bigint NOT NULL,'
'comm_id bigint,'
@@ -479,6 +482,17 @@ do_query(query, 'CREATE TABLE pwrx ('
'last_cstate integer,'
'wake_reason integer)')
+do_query(query, 'CREATE TABLE context_switches ('
+ 'id bigint NOT NULL,'
+ 'machine_id bigint,'
+ 'time bigint,'
+ 'cpu integer,'
+ 'thread_out_id bigint,'
+ 'comm_out_id bigint,'
+ 'thread_in_id bigint,'
+ 'comm_in_id bigint,'
+ 'flags integer)')
+
do_query(query, 'CREATE VIEW machines_view AS '
'SELECT '
'id,'
@@ -692,6 +706,29 @@ do_query(query, 'CREATE VIEW power_events_view AS '
' INNER JOIN selected_events ON selected_events.id = samples.evsel_id'
' ORDER BY samples.id')
+do_query(query, 'CREATE VIEW context_switches_view AS '
+ 'SELECT '
+ 'context_switches.id,'
+ 'context_switches.machine_id,'
+ 'context_switches.time,'
+ 'context_switches.cpu,'
+ 'th_out.pid AS pid_out,'
+ 'th_out.tid AS tid_out,'
+ 'comm_out.comm AS comm_out,'
+ 'th_in.pid AS pid_in,'
+ 'th_in.tid AS tid_in,'
+ 'comm_in.comm AS comm_in,'
+ 'CASE WHEN context_switches.flags = 0 THEN \'in\''
+ ' WHEN context_switches.flags = 1 THEN \'out\''
+ ' WHEN context_switches.flags = 3 THEN \'out preempt\''
+ ' ELSE CAST ( context_switches.flags AS VARCHAR(11) )'
+ 'END AS flags'
+ ' FROM context_switches'
+ ' INNER JOIN threads AS th_out ON th_out.id = context_switches.thread_out_id'
+ ' INNER JOIN threads AS th_in ON th_in.id = context_switches.thread_in_id'
+ ' INNER JOIN comms AS comm_out ON comm_out.id = context_switches.comm_out_id'
+ ' INNER JOIN comms AS comm_in ON comm_in.id = context_switches.comm_in_id')
+
file_header = struct.pack("!11sii", b"PGCOPY\n\377\r\n\0", 0, 0)
file_trailer = b"\377\377"
@@ -756,6 +793,7 @@ mwait_file = open_output_file("mwait_table.bin")
pwre_file = open_output_file("pwre_table.bin")
exstop_file = open_output_file("exstop_table.bin")
pwrx_file = open_output_file("pwrx_table.bin")
+context_switches_file = open_output_file("context_switches_table.bin")
def trace_begin():
printdate("Writing to intermediate files...")
@@ -763,7 +801,7 @@ def trace_begin():
evsel_table(0, "unknown")
machine_table(0, 0, "unknown")
thread_table(0, 0, 0, -1, -1)
- comm_table(0, "unknown")
+ comm_table(0, "unknown", 0, 0, 0)
dso_table(0, 0, "unknown", "unknown", "")
symbol_table(0, 0, 0, 0, 0, "unknown")
sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
@@ -804,6 +842,7 @@ def trace_end():
copy_output_file(pwre_file, "pwre")
copy_output_file(exstop_file, "exstop")
copy_output_file(pwrx_file, "pwrx")
+ copy_output_file(context_switches_file, "context_switches")
printdate("Removing intermediate files...")
remove_output_file(evsel_file)
@@ -825,6 +864,7 @@ def trace_end():
remove_output_file(pwre_file)
remove_output_file(exstop_file)
remove_output_file(pwrx_file)
+ remove_output_file(context_switches_file)
os.rmdir(output_dir_name)
printdate("Adding primary keys")
do_query(query, 'ALTER TABLE selected_events ADD PRIMARY KEY (id)')
@@ -846,11 +886,14 @@ def trace_end():
do_query(query, 'ALTER TABLE pwre ADD PRIMARY KEY (id)')
do_query(query, 'ALTER TABLE exstop ADD PRIMARY KEY (id)')
do_query(query, 'ALTER TABLE pwrx ADD PRIMARY KEY (id)')
+ do_query(query, 'ALTER TABLE context_switches ADD PRIMARY KEY (id)')
printdate("Adding foreign keys")
do_query(query, 'ALTER TABLE threads '
'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id),'
'ADD CONSTRAINT processfk FOREIGN KEY (process_id) REFERENCES threads (id)')
+ do_query(query, 'ALTER TABLE comms '
+ 'ADD CONSTRAINT threadfk FOREIGN KEY (c_thread_id) REFERENCES threads (id)')
do_query(query, 'ALTER TABLE comm_threads '
'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),'
'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id)')
@@ -881,6 +924,8 @@ def trace_end():
'ADD CONSTRAINT parent_call_pathfk FOREIGN KEY (parent_call_path_id) REFERENCES call_paths (id)')
do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)')
do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)')
+ do_query(query, 'ALTER TABLE comms ADD has_calls boolean')
+ do_query(query, 'UPDATE comms SET has_calls = TRUE WHERE comms.id IN (SELECT DISTINCT comm_id FROM calls)')
do_query(query, 'ALTER TABLE ptwrite '
'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)')
do_query(query, 'ALTER TABLE cbr '
@@ -893,6 +938,12 @@ def trace_end():
'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)')
do_query(query, 'ALTER TABLE pwrx '
'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)')
+ do_query(query, 'ALTER TABLE context_switches '
+ 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id),'
+ 'ADD CONSTRAINT toutfk FOREIGN KEY (thread_out_id) REFERENCES threads (id),'
+ 'ADD CONSTRAINT tinfk FOREIGN KEY (thread_in_id) REFERENCES threads (id),'
+ 'ADD CONSTRAINT coutfk FOREIGN KEY (comm_out_id) REFERENCES comms (id),'
+ 'ADD CONSTRAINT cinfk FOREIGN KEY (comm_in_id) REFERENCES comms (id)')
printdate("Dropping unused tables")
if is_table_empty("ptwrite"):
@@ -905,6 +956,8 @@ def trace_end():
drop("pwrx")
if is_table_empty("cbr"):
drop("cbr")
+ if is_table_empty("context_switches"):
+ drop("context_switches")
if (unhandled_count):
printdate("Warning: ", unhandled_count, " unhandled events")
@@ -935,11 +988,11 @@ def thread_table(thread_id, machine_id, process_id, pid, tid, *x):
value = struct.pack("!hiqiqiqiiii", 5, 8, thread_id, 8, machine_id, 8, process_id, 4, pid, 4, tid)
thread_file.write(value)
-def comm_table(comm_id, comm_str, *x):
+def comm_table(comm_id, comm_str, thread_id, time, exec_flag, *x):
comm_str = toserverstr(comm_str)
n = len(comm_str)
- fmt = "!hiqi" + str(n) + "s"
- value = struct.pack(fmt, 2, 8, comm_id, n, comm_str)
+ fmt = "!hiqi" + str(n) + "s" + "iqiqiB"
+ value = struct.pack(fmt, 5, 8, comm_id, n, comm_str, 8, thread_id, 8, time, 1, exec_flag)
comm_file.write(value)
def comm_thread_table(comm_thread_id, comm_id, thread_id, *x):
@@ -1051,3 +1104,8 @@ def synth_data(id, config, raw_buf, *x):
pwrx(id, raw_buf)
elif config == 5:
cbr(id, raw_buf)
+
+def context_switch_table(id, machine_id, time, cpu, thread_out_id, comm_out_id, thread_in_id, comm_in_id, flags, *x):
+ fmt = "!hiqiqiqiiiqiqiqiqii"
+ value = struct.pack(fmt, 9, 8, id, 8, machine_id, 8, time, 4, cpu, 8, thread_out_id, 8, comm_out_id, 8, thread_in_id, 8, comm_in_id, 4, flags)
+ context_switches_file.write(value)
diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py
index 021326c46285..8043a7272a56 100644
--- a/tools/perf/scripts/python/export-to-sqlite.py
+++ b/tools/perf/scripts/python/export-to-sqlite.py
@@ -177,7 +177,10 @@ do_query(query, 'CREATE TABLE threads ('
'tid integer)')
do_query(query, 'CREATE TABLE comms ('
'id integer NOT NULL PRIMARY KEY,'
- 'comm varchar(16))')
+ 'comm varchar(16),'
+ 'c_thread_id bigint,'
+ 'c_time bigint,'
+ 'exec_flag boolean)')
do_query(query, 'CREATE TABLE comm_threads ('
'id integer NOT NULL PRIMARY KEY,'
'comm_id bigint,'
@@ -303,6 +306,17 @@ do_query(query, 'CREATE TABLE pwrx ('
'last_cstate integer,'
'wake_reason integer)')
+do_query(query, 'CREATE TABLE context_switches ('
+ 'id integer NOT NULL PRIMARY KEY,'
+ 'machine_id bigint,'
+ 'time bigint,'
+ 'cpu integer,'
+ 'thread_out_id bigint,'
+ 'comm_out_id bigint,'
+ 'thread_in_id bigint,'
+ 'comm_in_id bigint,'
+ 'flags integer)')
+
# printf was added to sqlite in version 3.8.3
sqlite_has_printf = False
try:
@@ -527,6 +541,29 @@ do_query(query, 'CREATE VIEW power_events_view AS '
' INNER JOIN selected_events ON selected_events.id = evsel_id'
' WHERE selected_events.name IN (\'cbr\',\'mwait\',\'exstop\',\'pwre\',\'pwrx\')')
+do_query(query, 'CREATE VIEW context_switches_view AS '
+ 'SELECT '
+ 'context_switches.id,'
+ 'context_switches.machine_id,'
+ 'context_switches.time,'
+ 'context_switches.cpu,'
+ 'th_out.pid AS pid_out,'
+ 'th_out.tid AS tid_out,'
+ 'comm_out.comm AS comm_out,'
+ 'th_in.pid AS pid_in,'
+ 'th_in.tid AS tid_in,'
+ 'comm_in.comm AS comm_in,'
+ 'CASE WHEN context_switches.flags = 0 THEN \'in\''
+ ' WHEN context_switches.flags = 1 THEN \'out\''
+ ' WHEN context_switches.flags = 3 THEN \'out preempt\''
+ ' ELSE context_switches.flags '
+ 'END AS flags'
+ ' FROM context_switches'
+ ' INNER JOIN threads AS th_out ON th_out.id = context_switches.thread_out_id'
+ ' INNER JOIN threads AS th_in ON th_in.id = context_switches.thread_in_id'
+ ' INNER JOIN comms AS comm_out ON comm_out.id = context_switches.comm_out_id'
+ ' INNER JOIN comms AS comm_in ON comm_in.id = context_switches.comm_in_id')
+
do_query(query, 'END TRANSACTION')
evsel_query = QSqlQuery(db)
@@ -536,7 +573,7 @@ machine_query.prepare("INSERT INTO machines VALUES (?, ?, ?)")
thread_query = QSqlQuery(db)
thread_query.prepare("INSERT INTO threads VALUES (?, ?, ?, ?, ?)")
comm_query = QSqlQuery(db)
-comm_query.prepare("INSERT INTO comms VALUES (?, ?)")
+comm_query.prepare("INSERT INTO comms VALUES (?, ?, ?, ?, ?)")
comm_thread_query = QSqlQuery(db)
comm_thread_query.prepare("INSERT INTO comm_threads VALUES (?, ?, ?)")
dso_query = QSqlQuery(db)
@@ -568,6 +605,8 @@ exstop_query = QSqlQuery(db)
exstop_query.prepare("INSERT INTO exstop VALUES (?, ?)")
pwrx_query = QSqlQuery(db)
pwrx_query.prepare("INSERT INTO pwrx VALUES (?, ?, ?, ?)")
+context_switch_query = QSqlQuery(db)
+context_switch_query.prepare("INSERT INTO context_switches VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)")
def trace_begin():
printdate("Writing records...")
@@ -576,7 +615,7 @@ def trace_begin():
evsel_table(0, "unknown")
machine_table(0, 0, "unknown")
thread_table(0, 0, 0, -1, -1)
- comm_table(0, "unknown")
+ comm_table(0, "unknown", 0, 0, 0)
dso_table(0, 0, "unknown", "unknown", "")
symbol_table(0, 0, 0, 0, 0, "unknown")
sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
@@ -603,6 +642,8 @@ def trace_end():
if perf_db_export_calls:
do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)')
do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)')
+ do_query(query, 'ALTER TABLE comms ADD has_calls boolean')
+ do_query(query, 'UPDATE comms SET has_calls = 1 WHERE comms.id IN (SELECT DISTINCT comm_id FROM calls)')
printdate("Dropping unused tables")
if is_table_empty("ptwrite"):
@@ -615,6 +656,8 @@ def trace_end():
drop("pwrx")
if is_table_empty("cbr"):
drop("cbr")
+ if is_table_empty("context_switches"):
+ drop("context_switches")
if (unhandled_count):
printdate("Warning: ", unhandled_count, " unhandled events")
@@ -642,7 +685,7 @@ def thread_table(*x):
bind_exec(thread_query, 5, x)
def comm_table(*x):
- bind_exec(comm_query, 2, x)
+ bind_exec(comm_query, 5, x)
def comm_thread_table(*x):
bind_exec(comm_thread_query, 3, x)
@@ -748,3 +791,6 @@ def synth_data(id, config, raw_buf, *x):
pwrx(id, raw_buf)
elif config == 5:
cbr(id, raw_buf)
+
+def context_switch_table(*x):
+ bind_exec(context_switch_query, 9, x)
diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index 6e7934f2ac9a..61b3911d91e6 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -392,7 +392,7 @@ class FindBar():
self.hbox.addWidget(self.close_button)
self.bar = QWidget()
- self.bar.setLayout(self.hbox);
+ self.bar.setLayout(self.hbox)
self.bar.hide()
def Widget(self):
@@ -470,7 +470,7 @@ class CallGraphLevelItemBase(object):
self.params = params
self.row = row
self.parent_item = parent_item
- self.query_done = False;
+ self.query_done = False
self.child_count = 0
self.child_items = []
if parent_item:
@@ -517,7 +517,7 @@ class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase):
self.time = time
def Select(self):
- self.query_done = True;
+ self.query_done = True
query = QSqlQuery(self.glb.db)
if self.params.have_ipc:
ipc_str = ", SUM(insn_count), SUM(cyc_count)"
@@ -604,7 +604,7 @@ class CallGraphLevelOneItem(CallGraphLevelItemBase):
self.dbid = comm_id
def Select(self):
- self.query_done = True;
+ self.query_done = True
query = QSqlQuery(self.glb.db)
QueryExec(query, "SELECT thread_id, pid, tid"
" FROM comm_threads"
@@ -622,9 +622,12 @@ class CallGraphRootItem(CallGraphLevelItemBase):
def __init__(self, glb, params):
super(CallGraphRootItem, self).__init__(glb, params, 0, None)
self.dbid = 0
- self.query_done = True;
+ self.query_done = True
+ if_has_calls = ""
+ if IsSelectable(glb.db, "comms", columns = "has_calls"):
+ if_has_calls = " WHERE has_calls = TRUE"
query = QSqlQuery(glb.db)
- QueryExec(query, "SELECT id, comm FROM comms")
+ QueryExec(query, "SELECT id, comm FROM comms" + if_has_calls)
while query.next():
if not query.value(0):
continue
@@ -793,7 +796,7 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase):
self.time = time
def Select(self):
- self.query_done = True;
+ self.query_done = True
if self.calls_id == 0:
comm_thread = " AND comm_id = " + str(self.comm_id) + " AND thread_id = " + str(self.thread_id)
else:
@@ -881,7 +884,7 @@ class CallTreeLevelOneItem(CallGraphLevelItemBase):
self.dbid = comm_id
def Select(self):
- self.query_done = True;
+ self.query_done = True
query = QSqlQuery(self.glb.db)
QueryExec(query, "SELECT thread_id, pid, tid"
" FROM comm_threads"
@@ -899,9 +902,12 @@ class CallTreeRootItem(CallGraphLevelItemBase):
def __init__(self, glb, params):
super(CallTreeRootItem, self).__init__(glb, params, 0, None)
self.dbid = 0
- self.query_done = True;
+ self.query_done = True
+ if_has_calls = ""
+ if IsSelectable(glb.db, "comms", columns = "has_calls"):
+ if_has_calls = " WHERE has_calls = TRUE"
query = QSqlQuery(glb.db)
- QueryExec(query, "SELECT id, comm FROM comms")
+ QueryExec(query, "SELECT id, comm FROM comms" + if_has_calls)
while query.next():
if not query.value(0):
continue
@@ -971,7 +977,7 @@ class VBox():
def __init__(self, w1, w2, w3=None):
self.vbox = QWidget()
- self.vbox.setLayout(QVBoxLayout());
+ self.vbox.setLayout(QVBoxLayout())
self.vbox.layout().setContentsMargins(0, 0, 0, 0)
@@ -1391,7 +1397,7 @@ class FetchMoreRecordsBar():
self.hbox.addWidget(self.close_button)
self.bar = QWidget()
- self.bar.setLayout(self.hbox);
+ self.bar.setLayout(self.hbox)
self.bar.show()
self.in_progress = False
@@ -2206,7 +2212,7 @@ class ReportDialogBase(QDialog):
self.vbox.addLayout(self.grid)
self.vbox.addLayout(self.hbox)
- self.setLayout(self.vbox);
+ self.setLayout(self.vbox)
def Ok(self):
vars = self.report_vars
@@ -3139,7 +3145,7 @@ class AboutDialog(QDialog):
self.vbox = QVBoxLayout()
self.vbox.addWidget(self.text)
- self.setLayout(self.vbox);
+ self.setLayout(self.vbox)
# Font resize
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 66a82badc1d1..c3bec9d2c201 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -21,6 +21,7 @@
#include <subcmd/parse-options.h>
#include "string2.h"
#include "symbol.h"
+#include "util/rlimit.h"
#include <linux/kernel.h>
#include <linux/string.h>
#include <subcmd/exec-cmd.h>
@@ -727,6 +728,11 @@ int cmd_test(int argc, const char **argv)
if (skip != NULL)
skiplist = intlist__new(skip);
+ /*
+ * Tests that create BPF maps, for instance, need more than the 64K
+ * default:
+ */
+ rlimit__bump_memlock();
return __cmd_test(argc, argv, skiplist);
}
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index d7e3b008a613..14f812bb07a7 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -20,6 +20,7 @@ perf-y += parse-events.o
perf-y += perf_regs.o
perf-y += path.o
perf-y += print_binary.o
+perf-y += rlimit.o
perf-y += argv_split.o
perf-y += rbtree.o
perf-y += libstring.o
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 67b88b599a53..3d1c34fc4d68 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -2460,7 +2460,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
/* Something went wrong, no need to continue */
if (!inode) {
- err = PTR_ERR(inode);
+ err = -ENOMEM;
goto err_free_metadata;
}
@@ -2517,8 +2517,10 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
session->auxtrace = &etm->auxtrace;
etm->unknown_thread = thread__new(999999999, 999999999);
- if (!etm->unknown_thread)
+ if (!etm->unknown_thread) {
+ err = -ENOMEM;
goto err_free_queues;
+ }
/*
* Initialize list node so that at thread__zput() we can avoid
@@ -2530,8 +2532,10 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
if (err)
goto err_delete_thread;
- if (thread__init_map_groups(etm->unknown_thread, etm->machine))
+ if (thread__init_map_groups(etm->unknown_thread, etm->machine)) {
+ err = -ENOMEM;
goto err_delete_thread;
+ }
if (dump_trace) {
cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu);
@@ -2575,5 +2579,5 @@ err_free_traceid_list:
err_free_hdr:
zfree(&hdr);
- return -EINVAL;
+ return err;
}
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index 2394c7506abe..ffbb3e7d3288 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -20,70 +20,14 @@
#include "db-export.h"
#include <linux/zalloc.h>
-struct deferred_export {
- struct list_head node;
- struct comm *comm;
-};
-
-static int db_export__deferred(struct db_export *dbe)
-{
- struct deferred_export *de;
- int err;
-
- while (!list_empty(&dbe->deferred)) {
- de = list_entry(dbe->deferred.next, struct deferred_export,
- node);
- err = dbe->export_comm(dbe, de->comm);
- list_del_init(&de->node);
- free(de);
- if (err)
- return err;
- }
-
- return 0;
-}
-
-static void db_export__free_deferred(struct db_export *dbe)
-{
- struct deferred_export *de;
-
- while (!list_empty(&dbe->deferred)) {
- de = list_entry(dbe->deferred.next, struct deferred_export,
- node);
- list_del_init(&de->node);
- free(de);
- }
-}
-
-static int db_export__defer_comm(struct db_export *dbe, struct comm *comm)
-{
- struct deferred_export *de;
-
- de = zalloc(sizeof(struct deferred_export));
- if (!de)
- return -ENOMEM;
-
- de->comm = comm;
- list_add_tail(&de->node, &dbe->deferred);
-
- return 0;
-}
-
int db_export__init(struct db_export *dbe)
{
memset(dbe, 0, sizeof(struct db_export));
- INIT_LIST_HEAD(&dbe->deferred);
return 0;
}
-int db_export__flush(struct db_export *dbe)
-{
- return db_export__deferred(dbe);
-}
-
void db_export__exit(struct db_export *dbe)
{
- db_export__free_deferred(dbe);
call_return_processor__free(dbe->crp);
dbe->crp = NULL;
}
@@ -115,71 +59,73 @@ int db_export__machine(struct db_export *dbe, struct machine *machine)
}
int db_export__thread(struct db_export *dbe, struct thread *thread,
- struct machine *machine, struct comm *comm)
+ struct machine *machine, struct thread *main_thread)
{
- struct thread *main_thread;
u64 main_thread_db_id = 0;
- int err;
if (thread->db_id)
return 0;
thread->db_id = ++dbe->thread_last_db_id;
- if (thread->pid_ != -1) {
- if (thread->pid_ == thread->tid) {
- main_thread = thread;
- } else {
- main_thread = machine__findnew_thread(machine,
- thread->pid_,
- thread->pid_);
- if (!main_thread)
- return -ENOMEM;
- err = db_export__thread(dbe, main_thread, machine,
- comm);
- if (err)
- goto out_put;
- if (comm) {
- err = db_export__comm_thread(dbe, comm, thread);
- if (err)
- goto out_put;
- }
- }
+ if (main_thread)
main_thread_db_id = main_thread->db_id;
- if (main_thread != thread)
- thread__put(main_thread);
- }
if (dbe->export_thread)
return dbe->export_thread(dbe, thread, main_thread_db_id,
machine);
return 0;
+}
-out_put:
- thread__put(main_thread);
- return err;
+static int __db_export__comm(struct db_export *dbe, struct comm *comm,
+ struct thread *thread)
+{
+ comm->db_id = ++dbe->comm_last_db_id;
+
+ if (dbe->export_comm)
+ return dbe->export_comm(dbe, comm, thread);
+
+ return 0;
}
int db_export__comm(struct db_export *dbe, struct comm *comm,
- struct thread *main_thread)
+ struct thread *thread)
+{
+ if (comm->db_id)
+ return 0;
+
+ return __db_export__comm(dbe, comm, thread);
+}
+
+/*
+ * Export the "exec" comm. The "exec" comm is the program / application command
+ * name at the time it first executes. It is used to group threads for the same
+ * program. Note that the main thread pid (or thread group id tgid) cannot be
+ * used because it does not change when a new program is exec'ed.
+ */
+int db_export__exec_comm(struct db_export *dbe, struct comm *comm,
+ struct thread *main_thread)
{
int err;
if (comm->db_id)
return 0;
- comm->db_id = ++dbe->comm_last_db_id;
-
- if (dbe->export_comm) {
- if (main_thread->comm_set)
- err = dbe->export_comm(dbe, comm);
- else
- err = db_export__defer_comm(dbe, comm);
- if (err)
- return err;
- }
+ err = __db_export__comm(dbe, comm, main_thread);
+ if (err)
+ return err;
+ /*
+ * Record the main thread for this comm. Note that the main thread can
+ * have many "exec" comms because there will be a new one every time it
+ * exec's. An "exec" comm however will only ever have 1 main thread.
+ * That is different to any other threads for that same program because
+ * exec() will effectively kill them, so the relationship between the
+ * "exec" comm and non-main threads is 1-to-1. That is why
+ * db_export__comm_thread() is called here for the main thread, but it
+ * is called for non-main threads when they are exported.
+ */
return db_export__comm_thread(dbe, comm, main_thread);
}
@@ -340,11 +286,65 @@ int db_export__branch_type(struct db_export *dbe, u32 branch_type,
return 0;
}
+static int db_export__threads(struct db_export *dbe, struct thread *thread,
+ struct thread *main_thread,
+ struct machine *machine, struct comm **comm_ptr)
+{
+ struct comm *comm = NULL;
+ struct comm *curr_comm;
+ int err;
+
+ if (main_thread) {
+ /*
+ * A thread has a reference to the main thread, so export the
+ * main thread first.
+ */
+ err = db_export__thread(dbe, main_thread, machine, main_thread);
+ if (err)
+ return err;
+ /*
+ * Export comm before exporting the non-main thread because
+ * db_export__comm_thread() can be called further below.
+ */
+ comm = machine__thread_exec_comm(machine, main_thread);
+ if (comm) {
+ err = db_export__exec_comm(dbe, comm, main_thread);
+ if (err)
+ return err;
+ *comm_ptr = comm;
+ }
+ }
+
+ if (thread != main_thread) {
+ /*
+ * For a non-main thread, db_export__comm_thread() must be
+ * called only if thread has not previously been exported.
+ */
+ bool export_comm_thread = comm && !thread->db_id;
+
+ err = db_export__thread(dbe, thread, machine, main_thread);
+ if (err)
+ return err;
+
+ if (export_comm_thread) {
+ err = db_export__comm_thread(dbe, comm, thread);
+ if (err)
+ return err;
+ }
+ }
+
+ curr_comm = thread__comm(thread);
+ if (curr_comm)
+ return db_export__comm(dbe, curr_comm, thread);
+
+ return 0;
+}
+
int db_export__sample(struct db_export *dbe, union perf_event *event,
struct perf_sample *sample, struct perf_evsel *evsel,
struct addr_location *al)
{
- struct thread* thread = al->thread;
+ struct thread *thread = al->thread;
struct export_sample es = {
.event = event,
.sample = sample,
@@ -364,19 +364,13 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
return err;
main_thread = thread__main_thread(al->machine, thread);
- if (main_thread)
- comm = machine__thread_exec_comm(al->machine, main_thread);
- err = db_export__thread(dbe, thread, al->machine, comm);
+ err = db_export__threads(dbe, thread, main_thread, al->machine, &comm);
if (err)
goto out_put;
- if (comm) {
- err = db_export__comm(dbe, comm, main_thread);
- if (err)
- goto out_put;
+ if (comm)
es.comm_db_id = comm->db_id;
- }
es.db_id = ++dbe->sample_last_db_id;
@@ -525,3 +519,92 @@ int db_export__call_return(struct db_export *dbe, struct call_return *cr,
return 0;
}
+
+static int db_export__pid_tid(struct db_export *dbe, struct machine *machine,
+ pid_t pid, pid_t tid, u64 *db_id,
+ struct comm **comm_ptr, bool *is_idle)
+{
+ struct thread *thread = machine__find_thread(machine, pid, tid);
+ struct thread *main_thread;
+ int err = 0;
+
+ if (!thread || !thread->comm_set)
+ goto out_put;
+
+ *is_idle = !thread->pid_ && !thread->tid;
+
+ main_thread = thread__main_thread(machine, thread);
+
+ err = db_export__threads(dbe, thread, main_thread, machine, comm_ptr);
+
+ *db_id = thread->db_id;
+
+ thread__put(main_thread);
+out_put:
+ thread__put(thread);
+
+ return err;
+}
+
+int db_export__switch(struct db_export *dbe, union perf_event *event,
+ struct perf_sample *sample, struct machine *machine)
+{
+ bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+ bool out_preempt = out &&
+ (event->header.misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT);
+ int flags = out | (out_preempt << 1);
+ bool is_idle_a = false, is_idle_b = false;
+ u64 th_a_id = 0, th_b_id = 0;
+ u64 comm_out_id, comm_in_id;
+ struct comm *comm_a = NULL;
+ struct comm *comm_b = NULL;
+ u64 th_out_id, th_in_id;
+ u64 db_id;
+ int err;
+
+ err = db_export__machine(dbe, machine);
+ if (err)
+ return err;
+
+ err = db_export__pid_tid(dbe, machine, sample->pid, sample->tid,
+ &th_a_id, &comm_a, &is_idle_a);
+ if (err)
+ return err;
+
+ if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) {
+ pid_t pid = event->context_switch.next_prev_pid;
+ pid_t tid = event->context_switch.next_prev_tid;
+
+ err = db_export__pid_tid(dbe, machine, pid, tid, &th_b_id,
+ &comm_b, &is_idle_b);
+ if (err)
+ return err;
+ }
+
+ /*
+ * Do not export if both threads are unknown (i.e. not being traced),
+ * or one is unknown and the other is the idle task.
+ */
+ if ((!th_a_id || is_idle_a) && (!th_b_id || is_idle_b))
+ return 0;
+
+ db_id = ++dbe->context_switch_last_db_id;
+
+ if (out) {
+ th_out_id = th_a_id;
+ th_in_id = th_b_id;
+ comm_out_id = comm_a ? comm_a->db_id : 0;
+ comm_in_id = comm_b ? comm_b->db_id : 0;
+ } else {
+ th_out_id = th_b_id;
+ th_in_id = th_a_id;
+ comm_out_id = comm_b ? comm_b->db_id : 0;
+ comm_in_id = comm_a ? comm_a->db_id : 0;
+ }
+
+ if (dbe->export_context_switch)
+ return dbe->export_context_switch(dbe, db_id, machine, sample,
+ th_out_id, comm_out_id,
+ th_in_id, comm_in_id, flags);
+ return 0;
+}
diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h
index e8a64028a386..ba1f62a5fe10 100644
--- a/tools/perf/util/db-export.h
+++ b/tools/perf/util/db-export.h
@@ -43,7 +43,8 @@ struct db_export {
int (*export_machine)(struct db_export *dbe, struct machine *machine);
int (*export_thread)(struct db_export *dbe, struct thread *thread,
u64 main_thread_db_id, struct machine *machine);
- int (*export_comm)(struct db_export *dbe, struct comm *comm);
+ int (*export_comm)(struct db_export *dbe, struct comm *comm,
+ struct thread *thread);
int (*export_comm_thread)(struct db_export *dbe, u64 db_id,
struct comm *comm, struct thread *thread);
int (*export_dso)(struct db_export *dbe, struct dso *dso,
@@ -56,6 +57,11 @@ struct db_export {
int (*export_call_path)(struct db_export *dbe, struct call_path *cp);
int (*export_call_return)(struct db_export *dbe,
struct call_return *cr);
+ int (*export_context_switch)(struct db_export *dbe, u64 db_id,
+ struct machine *machine,
+ struct perf_sample *sample,
+ u64 th_out_id, u64 comm_out_id,
+ u64 th_in_id, u64 comm_in_id, int flags);
struct call_return_processor *crp;
struct call_path_root *cpr;
u64 evsel_last_db_id;
@@ -68,18 +74,19 @@ struct db_export {
u64 sample_last_db_id;
u64 call_path_last_db_id;
u64 call_return_last_db_id;
- struct list_head deferred;
+ u64 context_switch_last_db_id;
};
int db_export__init(struct db_export *dbe);
-int db_export__flush(struct db_export *dbe);
void db_export__exit(struct db_export *dbe);
int db_export__evsel(struct db_export *dbe, struct perf_evsel *evsel);
int db_export__machine(struct db_export *dbe, struct machine *machine);
int db_export__thread(struct db_export *dbe, struct thread *thread,
- struct machine *machine, struct comm *comm);
+ struct machine *machine, struct thread *main_thread);
int db_export__comm(struct db_export *dbe, struct comm *comm,
- struct thread *main_thread);
+ struct thread *thread);
+int db_export__exec_comm(struct db_export *dbe, struct comm *comm,
+ struct thread *main_thread);
int db_export__comm_thread(struct db_export *dbe, struct comm *comm,
struct thread *thread);
int db_export__dso(struct db_export *dbe, struct dso *dso,
@@ -97,5 +104,7 @@ int db_export__branch_types(struct db_export *dbe);
int db_export__call_path(struct db_export *dbe, struct call_path *cp);
int db_export__call_return(struct db_export *dbe, struct call_return *cr,
u64 *parent_db_id);
+int db_export__switch(struct db_export *dbe, union perf_event *event,
+ struct perf_sample *sample, struct machine *machine);
#endif
diff --git a/tools/perf/util/rlimit.c b/tools/perf/util/rlimit.c
new file mode 100644
index 000000000000..13521d392a22
--- /dev/null
+++ b/tools/perf/util/rlimit.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+
+#include "util/debug.h"
+#include "util/rlimit.h"
+#include <sys/time.h>
+#include <sys/resource.h>
+
+/*
+ * Bump the memlock so that we can get bpf maps of a reasonable size,
+ * like the ones used with 'perf trace' and with 'perf test bpf',
+ * improve this to some specific request if needed.
+ */
+void rlimit__bump_memlock(void)
+{
+ struct rlimit rlim;
+
+ if (getrlimit(RLIMIT_MEMLOCK, &rlim) == 0) {
+ rlim.rlim_cur *= 4;
+ rlim.rlim_max *= 4;
+
+ if (setrlimit(RLIMIT_MEMLOCK, &rlim) < 0) {
+ rlim.rlim_cur /= 2;
+ rlim.rlim_max /= 2;
+
+ if (setrlimit(RLIMIT_MEMLOCK, &rlim) < 0)
+ pr_debug("Couldn't bump rlimit(MEMLOCK), failures may take place when creating BPF maps, etc\n");
+ }
+ }
+}
diff --git a/tools/perf/util/rlimit.h b/tools/perf/util/rlimit.h
new file mode 100644
index 000000000000..9f59d8e710a3
--- /dev/null
+++ b/tools/perf/util/rlimit.h
@@ -0,0 +1,6 @@
+#ifndef __PERF_RLIMIT_H_
+#define __PERF_RLIMIT_H_
+/* SPDX-License-Identifier: LGPL-2.1 */
+
+void rlimit__bump_memlock(void);
+#endif // __PERF_RLIMIT_H_
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 112bed65232f..25dc1d765553 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -113,6 +113,7 @@ struct tables {
PyObject *call_path_handler;
PyObject *call_return_handler;
PyObject *synth_handler;
+ PyObject *context_switch_handler;
bool db_export_mode;
};
@@ -1011,15 +1012,19 @@ static int python_export_thread(struct db_export *dbe, struct thread *thread,
return 0;
}
-static int python_export_comm(struct db_export *dbe, struct comm *comm)
+static int python_export_comm(struct db_export *dbe, struct comm *comm,
+ struct thread *thread)
{
struct tables *tables = container_of(dbe, struct tables, dbe);
PyObject *t;
- t = tuple_new(2);
+ t = tuple_new(5);
tuple_set_u64(t, 0, comm->db_id);
tuple_set_string(t, 1, comm__str(comm));
+ tuple_set_u64(t, 2, thread->db_id);
+ tuple_set_u64(t, 3, comm->start);
+ tuple_set_s32(t, 4, comm->exec);
call_object(tables->comm_handler, t, "comm_table");
@@ -1233,6 +1238,34 @@ static int python_export_call_return(struct db_export *dbe,
return 0;
}
+static int python_export_context_switch(struct db_export *dbe, u64 db_id,
+ struct machine *machine,
+ struct perf_sample *sample,
+ u64 th_out_id, u64 comm_out_id,
+ u64 th_in_id, u64 comm_in_id, int flags)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ PyObject *t;
+
+ t = tuple_new(9);
+
+ tuple_set_u64(t, 0, db_id);
+ tuple_set_u64(t, 1, machine->db_id);
+ tuple_set_u64(t, 2, sample->time);
+ tuple_set_s32(t, 3, sample->cpu);
+ tuple_set_u64(t, 4, th_out_id);
+ tuple_set_u64(t, 5, comm_out_id);
+ tuple_set_u64(t, 6, th_in_id);
+ tuple_set_u64(t, 7, comm_in_id);
+ tuple_set_s32(t, 8, flags);
+
+ call_object(tables->context_switch_handler, t, "context_switch");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
static int python_process_call_return(struct call_return *cr, u64 *parent_db_id,
void *data)
{
@@ -1296,6 +1329,16 @@ static void python_process_event(union perf_event *event,
}
}
+static void python_process_switch(union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct tables *tables = &tables_global;
+
+ if (tables->db_export_mode)
+ db_export__switch(&tables->dbe, event, sample, machine);
+}
+
static void get_handler_name(char *str, size_t size,
struct perf_evsel *evsel)
{
@@ -1511,6 +1554,7 @@ static void set_table_handlers(struct tables *tables)
SET_TABLE_HANDLER(sample);
SET_TABLE_HANDLER(call_path);
SET_TABLE_HANDLER(call_return);
+ SET_TABLE_HANDLER(context_switch);
/*
* Synthesized events are samples but with architecture-specific data
@@ -1620,9 +1664,7 @@ error:
static int python_flush_script(void)
{
- struct tables *tables = &tables_global;
-
- return db_export__flush(&tables->dbe);
+ return 0;
}
/*
@@ -1831,6 +1873,7 @@ struct scripting_ops python_scripting_ops = {
.flush_script = python_flush_script,
.stop_script = python_stop_script,
.process_event = python_process_event,
+ .process_switch = python_process_switch,
.process_stat = python_process_stat,
.process_stat_interval = python_process_stat_interval,
.generate_script = python_generate_script,
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index d9b0a942090a..c7002fe11673 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -81,6 +81,9 @@ struct scripting_ops {
struct perf_sample *sample,
struct perf_evsel *evsel,
struct addr_location *al);
+ void (*process_switch)(union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
void (*process_stat)(struct perf_stat_config *config,
struct perf_evsel *evsel, u64 tstamp);
void (*process_stat_interval)(u64 tstamp);
diff --git a/tools/power/cpupower/debug/kernel/Makefile b/tools/power/cpupower/debug/kernel/Makefile
index c23e5a6ceb7e..7b5c43684be1 100644
--- a/tools/power/cpupower/debug/kernel/Makefile
+++ b/tools/power/cpupower/debug/kernel/Makefile
@@ -12,8 +12,8 @@ default:
$(MAKE) -C $(KDIR) M=$(CURDIR)
clean:
- - rm -rf *.o *.ko .tmp-versions .*.cmd .*.mod.* *.mod.c
- - rm -rf .tmp_versions* Module.symvers modules.order
+ - rm -rf *.o *.ko .*.cmd .*.mod.* *.mod.c
+ - rm -rf Module.symvers modules.order
install: default
install -d $(KMISC)
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 62afd0b43074..ba7849751989 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -10,11 +10,11 @@ UNAME_M := $(shell uname -m)
LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/ucall.c lib/sparsebit.c
LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c
LIBKVM_aarch64 = lib/aarch64/processor.c
+LIBKVM_s390x = lib/s390x/processor.c
TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
-TEST_GEN_PROGS_x86_64 += x86_64/kvm_create_max_vcpus
TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
@@ -26,9 +26,14 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
+TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
TEST_GEN_PROGS_aarch64 += dirty_log_test
+TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
+
+TEST_GEN_PROGS_s390x += s390x/sync_regs_test
+TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
LIBKVM += $(LIBKVM_$(UNAME_M))
@@ -43,7 +48,12 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
$(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
-LDFLAGS += -pthread $(no-pie-option)
+# On s390, build the testcases KVM-enabled
+pgste-option = $(call try-run, echo 'int main() { return 0; }' | \
+ $(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste)
+
+
+LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
# After inclusion, $(OUTPUT) is defined and
# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 00235f5932f0..e0e66b115ef2 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -41,6 +41,12 @@ enum vm_guest_mode {
NUM_VM_MODES,
};
+#ifdef __aarch64__
+#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
+#else
+#define VM_MODE_DEFAULT VM_MODE_P52V48_4K
+#endif
+
#define vm_guest_mode_string(m) vm_guest_mode_string[m]
extern const char * const vm_guest_mode_string[];
@@ -111,10 +117,12 @@ void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_sregs *sregs);
int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_sregs *sregs);
+#ifdef __KVM_HAVE_VCPU_EVENTS
void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_events *events);
void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_events *events);
+#endif
#ifdef __x86_64__
void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_nested_state *state);
diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390x/processor.h
new file mode 100644
index 000000000000..e0e96a5f608c
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/s390x/processor.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * s390x processor specific defines
+ */
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+/* Bits in the region/segment table entry */
+#define REGION_ENTRY_ORIGIN ~0xfffUL /* region/segment table origin */
+#define REGION_ENTRY_PROTECT 0x200 /* region protection bit */
+#define REGION_ENTRY_NOEXEC 0x100 /* region no-execute bit */
+#define REGION_ENTRY_OFFSET 0xc0 /* region table offset */
+#define REGION_ENTRY_INVALID 0x20 /* invalid region table entry */
+#define REGION_ENTRY_TYPE 0x0c /* region/segment table type mask */
+#define REGION_ENTRY_LENGTH 0x03 /* region third length */
+
+/* Bits in the page table entry */
+#define PAGE_INVALID 0x400 /* HW invalid bit */
+#define PAGE_PROTECT 0x200 /* HW read-only bit */
+#define PAGE_NOEXEC 0x100 /* HW no-execute bit */
+
+#endif
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
index 429226bc6a92..231d79e57774 100644
--- a/tools/testing/selftests/kvm/x86_64/kvm_create_max_vcpus.c
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -27,7 +27,7 @@ void test_vcpu_creation(int first_vcpu_id, int num_vcpus)
printf("Testing creating %d vCPUs, with IDs %d...%d.\n",
num_vcpus, first_vcpu_id, first_vcpu_id + num_vcpus - 1);
- vm = vm_create(VM_MODE_P52V48_4K, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+ vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
for (i = 0; i < num_vcpus; i++) {
int vcpu_id = first_vcpu_id + i;
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index af2023d818a5..486400a97374 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -227,7 +227,7 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2;
struct kvm_vm *vm;
- vm = vm_create(VM_MODE_P40V48_4K, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
+ vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
vm_vcpu_add_default(vm, vcpuid, guest_code);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 221e3fa46680..6e49bb039376 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -556,6 +556,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
int ret;
struct userspace_mem_region *region;
size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
+ size_t alignment;
TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
"address not on a page boundary.\n"
@@ -605,9 +606,20 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
TEST_ASSERT(region != NULL, "Insufficient Memory");
region->mmap_size = npages * vm->page_size;
- /* Enough memory to align up to a huge page. */
+#ifdef __s390x__
+ /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
+ alignment = 0x100000;
+#else
+ alignment = 1;
+#endif
+
if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
- region->mmap_size += huge_page_size;
+ alignment = max(huge_page_size, alignment);
+
+ /* Add enough memory to align up if necessary */
+ if (alignment > 1)
+ region->mmap_size += alignment;
+
region->mmap_start = mmap(NULL, region->mmap_size,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS
@@ -617,9 +629,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
"test_malloc failed, mmap_start: %p errno: %i",
region->mmap_start, errno);
- /* Align THP allocation up to start of a huge page. */
- region->host_mem = align(region->mmap_start,
- src_type == VM_MEM_SRC_ANONYMOUS_THP ? huge_page_size : 1);
+ /* Align host address */
+ region->host_mem = align(region->mmap_start, alignment);
/* As needed perform madvise */
if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) {
@@ -1218,6 +1229,7 @@ void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
ret, errno);
}
+#ifdef __KVM_HAVE_VCPU_EVENTS
void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_events *events)
{
@@ -1243,6 +1255,7 @@ void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
ret, errno);
}
+#endif
#ifdef __x86_64__
void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
new file mode 100644
index 000000000000..32a02360b1eb
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM selftest s390x library code - CPU-related functions (page tables...)
+ *
+ * Copyright (C) 2019, Red Hat, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include "processor.h"
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+
+#define PAGES_PER_REGION 4
+
+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
+{
+ vm_paddr_t paddr;
+
+ TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
+ vm->page_size);
+
+ if (vm->pgd_created)
+ return;
+
+ paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot);
+ memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
+
+ vm->pgd = paddr;
+ vm->pgd_created = true;
+}
+
+/*
+ * Allocate 4 pages for a region/segment table (ri < 4), or one page for
+ * a page table (ri == 4). Returns a suitable region/segment table entry
+ * which points to the freshly allocated pages.
+ */
+static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot)
+{
+ uint64_t taddr;
+
+ taddr = vm_phy_pages_alloc(vm, ri < 4 ? PAGES_PER_REGION : 1,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot);
+ memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size);
+
+ return (taddr & REGION_ENTRY_ORIGIN)
+ | (((4 - ri) << 2) & REGION_ENTRY_TYPE)
+ | ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH);
+}
+
+/*
+ * VM Virtual Page Map
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gva - VM Virtual Address
+ * gpa - VM Physical Address
+ * memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the VM given by vm, creates a virtual translation for the page
+ * starting at vaddr to the page starting at paddr.
+ */
+void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
+ uint32_t memslot)
+{
+ int ri, idx;
+ uint64_t *entry;
+
+ TEST_ASSERT((gva % vm->page_size) == 0,
+ "Virtual address not on page boundary,\n"
+ " vaddr: 0x%lx vm->page_size: 0x%x",
+ gva, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+ (gva >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx",
+ gva);
+ TEST_ASSERT((gpa % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ " paddr: 0x%lx vm->page_size: 0x%x",
+ gva, vm->page_size);
+ TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ gva, vm->max_gfn, vm->page_size);
+
+ /* Walk through region and segment tables */
+ entry = addr_gpa2hva(vm, vm->pgd);
+ for (ri = 1; ri <= 4; ri++) {
+ idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
+ if (entry[idx] & REGION_ENTRY_INVALID)
+ entry[idx] = virt_alloc_region(vm, ri, memslot);
+ entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
+ }
+
+ /* Fill in page table entry */
+ idx = (gva >> 12) & 0x0ffu; /* page index */
+ if (!(entry[idx] & PAGE_INVALID))
+ fprintf(stderr,
+ "WARNING: PTE for gpa=0x%"PRIx64" already set!\n", gpa);
+ entry[idx] = gpa;
+}
+
+/*
+ * Address Guest Virtual to Guest Physical
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gpa - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent VM physical address
+ *
+ * Translates the VM virtual address given by gva to a VM physical
+ * address and then locates the memory region containing the VM
+ * physical address, within the VM given by vm. When found, the host
+ * virtual address providing the memory to the vm physical address is
+ * returned.
+ * A TEST_ASSERT failure occurs if no region containing translated
+ * VM virtual address exists.
+ */
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ int ri, idx;
+ uint64_t *entry;
+
+ TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
+ vm->page_size);
+
+ entry = addr_gpa2hva(vm, vm->pgd);
+ for (ri = 1; ri <= 4; ri++) {
+ idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
+ TEST_ASSERT(!(entry[idx] & REGION_ENTRY_INVALID),
+ "No region mapping for vm virtual address 0x%lx",
+ gva);
+ entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
+ }
+
+ idx = (gva >> 12) & 0x0ffu; /* page index */
+
+ TEST_ASSERT(!(entry[idx] & PAGE_INVALID),
+ "No page mapping for vm virtual address 0x%lx", gva);
+
+ return (entry[idx] & ~0xffful) + (gva & 0xffful);
+}
+
+static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent,
+ uint64_t ptea_start)
+{
+ uint64_t *pte, ptea;
+
+ for (ptea = ptea_start; ptea < ptea_start + 0x100 * 8; ptea += 8) {
+ pte = addr_gpa2hva(vm, ptea);
+ if (*pte & PAGE_INVALID)
+ continue;
+ fprintf(stream, "%*spte @ 0x%lx: 0x%016lx\n",
+ indent, "", ptea, *pte);
+ }
+}
+
+static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent,
+ uint64_t reg_tab_addr)
+{
+ uint64_t addr, *entry;
+
+ for (addr = reg_tab_addr; addr < reg_tab_addr + 0x400 * 8; addr += 8) {
+ entry = addr_gpa2hva(vm, addr);
+ if (*entry & REGION_ENTRY_INVALID)
+ continue;
+ fprintf(stream, "%*srt%lde @ 0x%lx: 0x%016lx\n",
+ indent, "", 4 - ((*entry & REGION_ENTRY_TYPE) >> 2),
+ addr, *entry);
+ if (*entry & REGION_ENTRY_TYPE) {
+ virt_dump_region(stream, vm, indent + 2,
+ *entry & REGION_ENTRY_ORIGIN);
+ } else {
+ virt_dump_ptes(stream, vm, indent + 2,
+ *entry & REGION_ENTRY_ORIGIN);
+ }
+ }
+}
+
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ if (!vm->pgd_created)
+ return;
+
+ virt_dump_region(stream, vm, indent, vm->pgd);
+}
+
+/*
+ * Create a VM with reasonable defaults
+ *
+ * Input Args:
+ * vcpuid - The id of the single VCPU to add to the VM.
+ * extra_mem_pages - The size of extra memories to add (this will
+ * decide how much extra space we will need to
+ * setup the page tables using mem slot 0)
+ * guest_code - The vCPU's entry point
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ */
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
+ void *guest_code)
+{
+ /*
+ * The additional amount of pages required for the page tables is:
+ * 1 * n / 256 + 4 * (n / 256) / 2048 + 4 * (n / 256) / 2048^2 + ...
+ * which is definitely smaller than (n / 256) * 2.
+ */
+ uint64_t extra_pg_pages = extra_mem_pages / 256 * 2;
+ struct kvm_vm *vm;
+
+ vm = vm_create(VM_MODE_DEFAULT,
+ DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
+
+ kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+ vm_vcpu_add_default(vm, vcpuid, guest_code);
+
+ return vm;
+}
+
+/*
+ * Adds a vCPU with reasonable defaults (i.e. a stack and initial PSW)
+ *
+ * Input Args:
+ * vcpuid - The id of the VCPU to add to the VM.
+ * guest_code - The vCPU's entry point
+ */
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+{
+ size_t stack_size = DEFAULT_STACK_PGS * getpagesize();
+ uint64_t stack_vaddr;
+ struct kvm_regs regs;
+ struct kvm_sregs sregs;
+ struct kvm_run *run;
+
+ TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
+ vm->page_size);
+
+ stack_vaddr = vm_vaddr_alloc(vm, stack_size,
+ DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+
+ vm_vcpu_add(vm, vcpuid);
+
+ /* Setup guest registers */
+ vcpu_regs_get(vm, vcpuid, &regs);
+ regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160;
+ vcpu_regs_set(vm, vcpuid, &regs);
+
+ vcpu_sregs_get(vm, vcpuid, &sregs);
+ sregs.crs[0] |= 0x00040000; /* Enable floating point regs */
+ sregs.crs[1] = vm->pgd | 0xf; /* Primary region table */
+ vcpu_sregs_set(vm, vcpuid, &sregs);
+
+ run = vcpu_state(vm, vcpuid);
+ run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */
+ run->psw_addr = (uintptr_t)guest_code;
+}
+
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+{
+ struct vcpu *vcpu = vm->vcpu_head;
+
+ fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
+ indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index b430f962e323..6cb34a0fa200 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -821,7 +821,7 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
/* Create VM */
- vm = vm_create(VM_MODE_P52V48_4K,
+ vm = vm_create(VM_MODE_DEFAULT,
DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
O_RDWR);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index fe56d159d65f..204f847bd065 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -5,8 +5,6 @@
* Copyright (C) 2018, Google LLC.
*/
-#define _GNU_SOURCE /* for program_invocation_name */
-
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
new file mode 100644
index 000000000000..e85ff0d69548
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for s390x KVM_CAP_SYNC_REGS
+ *
+ * Based on the same test for x86:
+ * Copyright (C) 2018, Google LLC.
+ *
+ * Adaptions for s390x:
+ * Copyright (C) 2019, Red Hat, Inc.
+ *
+ * Test expected behavior of the KVM_CAP_SYNC_REGS functionality.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+
+#define VCPU_ID 5
+
+static void guest_code(void)
+{
+ for (;;) {
+ asm volatile ("diag 0,0,0x501");
+ asm volatile ("ahi 11,1");
+ }
+}
+
+#define REG_COMPARE(reg) \
+ TEST_ASSERT(left->reg == right->reg, \
+ "Register " #reg \
+ " values did not match: 0x%llx, 0x%llx\n", \
+ left->reg, right->reg)
+
+static void compare_regs(struct kvm_regs *left, struct kvm_sync_regs *right)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ REG_COMPARE(gprs[i]);
+}
+
+static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ REG_COMPARE(acrs[i]);
+
+ for (i = 0; i < 16; i++)
+ REG_COMPARE(crs[i]);
+}
+
+#undef REG_COMPARE
+
+#define TEST_SYNC_FIELDS (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS)
+#define INVALID_SYNC_FIELD 0x80000000
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ struct kvm_regs regs;
+ struct kvm_sregs sregs;
+ int rv, cap;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
+ if (!cap) {
+ fprintf(stderr, "CAP_SYNC_REGS not supported, skipping test\n");
+ exit(KSFT_SKIP);
+ }
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+ run = vcpu_state(vm, VCPU_ID);
+
+ /* Request and verify all valid register sets. */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
+ "Unexpected exit reason: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->s390_sieic.icptcode == 4 &&
+ (run->s390_sieic.ipa >> 8) == 0x83 &&
+ (run->s390_sieic.ipb >> 16) == 0x501,
+ "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x\n",
+ run->s390_sieic.icptcode, run->s390_sieic.ipa,
+ run->s390_sieic.ipb);
+
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ compare_regs(&regs, &run->s.regs);
+
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ compare_sregs(&sregs, &run->s.regs);
+
+ /* Set and verify various register values */
+ run->s.regs.gprs[11] = 0xBAD1DEA;
+ run->s.regs.acrs[0] = 1 << 11;
+
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ run->kvm_dirty_regs = KVM_SYNC_GPRS | KVM_SYNC_ACRS;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
+ "Unexpected exit reason: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1,
+ "r11 sync regs value incorrect 0x%llx.",
+ run->s.regs.gprs[11]);
+ TEST_ASSERT(run->s.regs.acrs[0] == 1 << 11,
+ "acr0 sync regs value incorrect 0x%llx.",
+ run->s.regs.acrs[0]);
+
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ compare_regs(&regs, &run->s.regs);
+
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ compare_sregs(&sregs, &run->s.regs);
+
+ /* Clear kvm_dirty_regs bits, verify new s.regs values are
+ * overwritten with existing guest values.
+ */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ run->kvm_dirty_regs = 0;
+ run->s.regs.gprs[11] = 0xDEADBEEF;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
+ "Unexpected exit reason: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF,
+ "r11 sync regs value incorrect 0x%llx.",
+ run->s.regs.gprs[11]);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
index 8a20e03d4cb7..9c60337317c6 100755
--- a/tools/testing/selftests/ntb/ntb_test.sh
+++ b/tools/testing/selftests/ntb/ntb_test.sh
@@ -78,10 +78,10 @@ set -e
function _modprobe()
{
- modprobe "$@"
+ modprobe "$@" || return 1
if [[ "$REMOTE_HOST" != "" ]]; then
- ssh "$REMOTE_HOST" modprobe "$@"
+ ssh "$REMOTE_HOST" modprobe "$@" || return 1
fi
}
@@ -442,6 +442,30 @@ function pingpong_test()
echo " Passed"
}
+function msi_test()
+{
+ LOC=$1
+ REM=$2
+
+ write_file 1 $LOC/ready
+
+ echo "Running MSI interrupt tests on: $(subdirname $LOC) / $(subdirname $REM)"
+
+ CNT=$(read_file "$LOC/count")
+ for ((i = 0; i < $CNT; i++)); do
+ START=$(read_file $REM/../irq${i}_occurrences)
+ write_file $i $LOC/trigger
+ END=$(read_file $REM/../irq${i}_occurrences)
+
+ if [[ $(($END - $START)) != 1 ]]; then
+ echo "MSI did not trigger the interrupt on the remote side!" >&2
+ exit 1
+ fi
+ done
+
+ echo " Passed"
+}
+
function perf_test()
{
USE_DMA=$1
@@ -520,6 +544,29 @@ function ntb_pingpong_tests()
_modprobe -r ntb_pingpong
}
+function ntb_msi_tests()
+{
+ LOCAL_MSI="$DEBUGFS/ntb_msi_test/$LOCAL_DEV"
+ REMOTE_MSI="$REMOTE_HOST:$DEBUGFS/ntb_msi_test/$REMOTE_DEV"
+
+ echo "Starting ntb_msi_test tests..."
+
+ if ! _modprobe ntb_msi_test 2> /dev/null; then
+ echo " Not doing MSI tests seeing the module is not available."
+ return
+ fi
+
+ port_test $LOCAL_MSI $REMOTE_MSI
+
+ LOCAL_PEER="$LOCAL_MSI/peer$LOCAL_PIDX"
+ REMOTE_PEER="$REMOTE_MSI/peer$REMOTE_PIDX"
+
+ msi_test $LOCAL_PEER $REMOTE_PEER
+ msi_test $REMOTE_PEER $LOCAL_PEER
+
+ _modprobe -r ntb_msi_test
+}
+
function ntb_perf_tests()
{
LOCAL_PERF="$DEBUGFS/ntb_perf/$LOCAL_DEV"
@@ -541,6 +588,7 @@ function cleanup()
_modprobe -r ntb_perf 2> /dev/null
_modprobe -r ntb_pingpong 2> /dev/null
_modprobe -r ntb_transport 2> /dev/null
+ _modprobe -r ntb_msi_test 2> /dev/null
set -e
}
@@ -577,5 +625,7 @@ ntb_tool_tests
echo
ntb_pingpong_tests
echo
+ntb_msi_tests
+echo
ntb_perf_tests
echo
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 5ab4c60c100e..15a329da59fa 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -489,25 +489,11 @@ static void test_ptrace_write_gsbase(void)
* selector value is changed or not by the GSBASE write in
* a ptracer.
*/
- if (gs != *shared_scratch) {
- nerrs++;
- printf("[FAIL]\tGS changed to %lx\n", gs);
-
- /*
- * On older kernels, poking a nonzero value into the
- * base would zero the selector. On newer kernels,
- * this behavior has changed -- poking the base
- * changes only the base and, if FSGSBASE is not
- * available, this may have no effect.
- */
- if (gs == 0)
- printf("\tNote: this is expected behavior on older kernels.\n");
- } else if (have_fsgsbase && (base != 0xFF)) {
- nerrs++;
- printf("[FAIL]\tGSBASE changed to %lx\n", base);
+ if (gs == 0 && base == 0xFF) {
+ printf("[OK]\tGS was reset as expected\n");
} else {
- printf("[OK]\tGS remained 0x%hx%s", *shared_scratch, have_fsgsbase ? " and GSBASE changed to 0xFF" : "");
- printf("\n");
+ nerrs++;
+ printf("[FAIL]\tGS=0x%lx, GSBASE=0x%lx (should be 0, 0xFF)\n", gs, base);
}
}
diff --git a/usr/include/Makefile b/usr/include/Makefile
index cd8daa20d487..aa316d99e035 100644
--- a/usr/include/Makefile
+++ b/usr/include/Makefile
@@ -30,8 +30,6 @@ header-test-$(CONFIG_CPU_BIG_ENDIAN) += linux/byteorder/big_endian.h
header-test-$(CONFIG_CPU_LITTLE_ENDIAN) += linux/byteorder/little_endian.h
header-test- += linux/coda.h
header-test- += linux/coda_psdev.h
-header-test- += linux/dvb/audio.h
-header-test- += linux/dvb/osd.h
header-test- += linux/elfcore.h
header-test- += linux/errqueue.h
header-test- += linux/fsmap.h
@@ -44,7 +42,6 @@ header-test- += linux/netfilter_bridge/ebtables.h
header-test- += linux/netfilter_ipv4/ipt_LOG.h
header-test- += linux/netfilter_ipv6/ip6t_LOG.h
header-test- += linux/nfc.h
-header-test- += linux/nilfs2_ondisk.h
header-test- += linux/omap3isp.h
header-test- += linux/omapfb.h
header-test- += linux/patchkey.h
@@ -59,9 +56,6 @@ header-test- += linux/v4l2-mediabus.h
header-test- += linux/v4l2-subdev.h
header-test- += linux/videodev2.h
header-test- += linux/vm_sockets.h
-header-test- += misc/ocxl.h
-header-test- += mtd/mtd-abi.h
-header-test- += mtd/mtd-user.h
header-test- += scsi/scsi_bsg_fc.h
header-test- += scsi/scsi_netlink.h
header-test- += scsi/scsi_netlink_fc.h
@@ -108,7 +102,6 @@ header-test- += linux/bpf_perf_event.h
endif
ifeq ($(SRCARCH),s390)
-header-test- += asm/runtime_instr.h
header-test- += asm/zcrypt.h
endif
@@ -116,7 +109,6 @@ ifeq ($(SRCARCH),sparc)
header-test- += asm/stat.h
header-test- += asm/uctx.h
header-test- += asm/fbio.h
-header-test- += asm/openpromio.h
endif
# asm-generic/*.h is used by asm/*.h, and should not be included directly
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b4ab59dd6846..887f3b0c2b60 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -314,6 +314,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
kvm_vcpu_set_in_spin_loop(vcpu, false);
kvm_vcpu_set_dy_eligible(vcpu, false);
vcpu->preempted = false;
+ vcpu->ready = false;
r = kvm_arch_vcpu_init(vcpu);
if (r < 0)
@@ -2387,6 +2388,7 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
wqp = kvm_arch_vcpu_wq(vcpu);
if (swq_has_sleeper(wqp)) {
swake_up_one(wqp);
+ WRITE_ONCE(vcpu->ready, true);
++vcpu->stat.halt_wakeup;
return true;
}
@@ -2500,7 +2502,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
continue;
} else if (pass && i > last_boosted_vcpu)
break;
- if (!READ_ONCE(vcpu->preempted))
+ if (!READ_ONCE(vcpu->ready))
continue;
if (vcpu == me)
continue;
@@ -4203,8 +4205,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
{
struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
- if (vcpu->preempted)
- vcpu->preempted = false;
+ vcpu->preempted = false;
+ WRITE_ONCE(vcpu->ready, false);
kvm_arch_sched_in(vcpu, cpu);
@@ -4216,8 +4218,10 @@ static void kvm_sched_out(struct preempt_notifier *pn,
{
struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
- if (current->state == TASK_RUNNING)
+ if (current->state == TASK_RUNNING) {
vcpu->preempted = true;
+ WRITE_ONCE(vcpu->ready, true);
+ }
kvm_arch_vcpu_put(vcpu);
}