aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--.mailmap1
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml1
-rw-r--r--Documentation/rust/quick-start.rst2
-rw-r--r--Documentation/rust/testing.rst2
-rw-r--r--Documentation/virt/kvm/api.rst727
-rw-r--r--Documentation/virt/kvm/locking.rst4
-rw-r--r--MAINTAINERS42
-rw-r--r--Makefile2
-rw-r--r--arch/arm64/include/asm/kvm_host.h2
-rw-r--r--arch/arm64/include/asm/tlbflush.h22
-rw-r--r--arch/arm64/mm/mmu.c5
-rw-r--r--arch/loongarch/include/asm/kvm_host.h7
-rw-r--r--arch/loongarch/kernel/asm-offsets.c1
-rw-r--r--arch/loongarch/kvm/Kconfig1
-rw-r--r--arch/loongarch/kvm/Makefile2
-rw-r--r--arch/loongarch/kvm/main.c3
-rw-r--r--arch/loongarch/kvm/switch.S12
-rw-r--r--arch/loongarch/kvm/vcpu.c37
-rw-r--r--arch/mips/include/asm/kvm_host.h1
-rw-r--r--arch/powerpc/include/asm/kvm_host.h1
-rw-r--r--arch/riscv/Kconfig.socs1
-rw-r--r--arch/riscv/include/asm/kvm_host.h2
-rw-r--r--arch/riscv/kvm/main.c4
-rw-r--r--arch/riscv/kvm/vcpu_onereg.c2
-rw-r--r--arch/riscv/kvm/vcpu_pmu.c1
-rw-r--r--arch/s390/include/asm/gmap.h1
-rw-r--r--arch/s390/include/asm/kvm_host.h1
-rw-r--r--arch/s390/include/asm/uv.h2
-rw-r--r--arch/s390/kernel/uv.c136
-rw-r--r--arch/s390/kvm/gmap.c103
-rw-r--r--arch/s390/kvm/intercept.c2
-rw-r--r--arch/s390/kvm/interrupt.c8
-rw-r--r--arch/s390/kvm/kvm-s390.c35
-rw-r--r--arch/s390/kvm/trace-s390.h4
-rw-r--r--arch/s390/mm/gmap.c28
-rw-r--r--arch/x86/hyperv/hv_vtl.c1
-rw-r--r--arch/x86/hyperv/ivm.c3
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h28
-rw-r--r--arch/x86/include/asm/sev-common.h12
-rw-r--r--arch/x86/include/asm/svm.h5
-rw-r--r--arch/x86/include/asm/vmx.h28
-rw-r--r--arch/x86/include/uapi/asm/kvm.h3
-rw-r--r--arch/x86/include/uapi/asm/svm.h2
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c2
-rw-r--r--arch/x86/kernel/cpu/sgx/driver.c10
-rw-r--r--arch/x86/kernel/cpu/vmware.c4
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/cpuid.c78
-rw-r--r--arch/x86/kvm/cpuid.h9
-rw-r--r--arch/x86/kvm/emulate.c5
-rw-r--r--arch/x86/kvm/i8259.c2
-rw-r--r--arch/x86/kvm/kvm_emulate.h7
-rw-r--r--arch/x86/kvm/lapic.c17
-rw-r--r--arch/x86/kvm/mmu/mmu.c365
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h3
-rw-r--r--arch/x86/kvm/mmu/spte.c31
-rw-r--r--arch/x86/kvm/mmu/spte.h2
-rw-r--r--arch/x86/kvm/mmu/tdp_iter.h34
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c53
-rw-r--r--arch/x86/kvm/smm.c2
-rw-r--r--arch/x86/kvm/svm/nested.c2
-rw-r--r--arch/x86/kvm/svm/sev.c373
-rw-r--r--arch/x86/kvm/svm/svm.c56
-rw-r--r--arch/x86/kvm/svm/svm.h39
-rw-r--r--arch/x86/kvm/trace.h14
-rw-r--r--arch/x86/kvm/vmx/nested.c18
-rw-r--r--arch/x86/kvm/vmx/nested.h22
-rw-r--r--arch/x86/kvm/vmx/posted_intr.c37
-rw-r--r--arch/x86/kvm/vmx/vmx.c224
-rw-r--r--arch/x86/kvm/x86.c346
-rw-r--r--arch/x86/kvm/x86.h8
-rw-r--r--arch/x86/kvm/xen.c121
-rw-r--r--arch/x86/kvm/xen.h30
-rw-r--r--drivers/block/null_blk/main.c4
-rw-r--r--drivers/block/virtio_blk.c5
-rw-r--r--drivers/bluetooth/Kconfig12
-rw-r--r--drivers/bluetooth/btusb.c41
-rw-r--r--drivers/clk/qcom/dispcc-sm8750.c2
-rw-r--r--drivers/clk/samsung/clk-gs101.c8
-rw-r--r--drivers/clk/samsung/clk-pll.c7
-rw-r--r--drivers/gpio/gpiolib-cdev.c15
-rw-r--r--drivers/gpio/gpiolib.c35
-rw-r--r--drivers/gpio/gpiolib.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c8
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c17
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c1
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c64
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_resource.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c1
-rw-r--r--drivers/gpu/drm/display/drm_dp_mst_topology.c40
-rw-r--r--drivers/gpu/drm/drm_atomic_uapi.c4
-rw-r--r--drivers/gpu/drm/drm_connector.c4
-rw-r--r--drivers/gpu/drm/drm_panic_qr.rs16
-rw-r--r--drivers/gpu/drm/gma500/mid_bios.c5
-rw-r--r--drivers/gpu/drm/hyperv/hyperv_drm_drv.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c5
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.c5
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c53
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c2
-rw-r--r--drivers/gpu/drm/xe/xe_hmm.c6
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c13
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c3
-rw-r--r--drivers/hv/vmbus_drv.c13
-rw-r--r--drivers/i2c/busses/i2c-ali1535.c12
-rw-r--r--drivers/i2c/busses/i2c-ali15x3.c12
-rw-r--r--drivers/i2c/busses/i2c-omap.c26
-rw-r--r--drivers/i2c/busses/i2c-sis630.c12
-rw-r--r--drivers/input/joystick/xpad.c39
-rw-r--r--drivers/input/misc/iqs7222.c50
-rw-r--r--drivers/input/serio/i8042-acpipnpio.h111
-rw-r--r--drivers/input/touchscreen/ads7846.c2
-rw-r--r--drivers/input/touchscreen/goodix_berlin_core.c26
-rw-r--r--drivers/input/touchscreen/imagis.c9
-rw-r--r--drivers/input/touchscreen/wdt87xx_i2c.c2
-rw-r--r--drivers/leds/leds-st1202.c21
-rw-r--r--drivers/md/dm-flakey.c2
-rw-r--r--drivers/media/dvb-frontends/rtl2832_sdr.c2
-rw-r--r--drivers/net/bonding/bond_options.c55
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c59
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c25
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c13
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_arfs.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_eswitch.c6
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lag.c27
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c18
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.h4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c52
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c42
-rw-r--r--drivers/net/ethernet/microsoft/mana/gdma_main.c11
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c10
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c8
-rw-r--r--drivers/net/ethernet/realtek/rtase/rtase_main.c10
-rw-r--r--drivers/net/mctp/mctp-i2c.c5
-rw-r--r--drivers/net/mctp/mctp-i3c.c5
-rw-r--r--drivers/net/phy/nxp-c45-tja11xx.c68
-rw-r--r--drivers/net/usb/lan78xx.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-trans.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/fw.c6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/tx.c11
-rw-r--r--drivers/nvme/host/apple.c3
-rw-r--r--drivers/nvme/host/core.c12
-rw-r--r--drivers/nvme/host/pci.c18
-rw-r--r--drivers/nvme/target/pci-epf.c28
-rw-r--r--drivers/pinctrl/bcm/pinctrl-bcm281xx.c2
-rw-r--r--drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c3
-rw-r--r--drivers/pinctrl/spacemit/Kconfig3
-rw-r--r--drivers/pinctrl/spacemit/pinctrl-k1.c2
-rw-r--r--drivers/platform/surface/surface_aggregator_registry.c5
-rw-r--r--drivers/platform/x86/amd/pmf/spc.c2
-rw-r--r--drivers/platform/x86/amd/pmf/tee-if.c36
-rw-r--r--drivers/spi/atmel-quadspi.c5
-rw-r--r--drivers/spi/spi-microchip-core.c41
-rw-r--r--drivers/thunderbolt/tunnel.c11
-rw-r--r--drivers/thunderbolt/tunnel.h2
-rw-r--r--drivers/usb/serial/ftdi_sio.c14
-rw-r--r--drivers/usb/serial/ftdi_sio_ids.h13
-rw-r--r--drivers/usb/serial/option.c48
-rw-r--r--drivers/usb/typec/tcpm/tcpm.c8
-rw-r--r--drivers/video/fbdev/hyperv_fb.c52
-rw-r--r--fs/afs/cell.c11
-rw-r--r--fs/afs/dynroot.c15
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/proc.c4
-rw-r--r--fs/bcachefs/btree_io.c2
-rw-r--r--fs/bcachefs/btree_update.h8
-rw-r--r--fs/bcachefs/btree_write_buffer.c21
-rw-r--r--fs/bcachefs/extents.c2
-rw-r--r--fs/bcachefs/inode.c1
-rw-r--r--fs/bcachefs/io_read.c19
-rw-r--r--fs/bcachefs/super.c11
-rw-r--r--fs/bcachefs/util.c24
-rw-r--r--fs/bcachefs/util.h2
-rw-r--r--fs/ext4/file.c3
-rw-r--r--fs/smb/client/connect.c16
-rw-r--r--fs/smb/client/fs_context.c18
-rw-r--r--fs/smb/server/connection.c20
-rw-r--r--fs/smb/server/connection.h2
-rw-r--r--fs/smb/server/ksmbd_work.c3
-rw-r--r--fs/smb/server/ksmbd_work.h1
-rw-r--r--fs/smb/server/oplock.c43
-rw-r--r--fs/smb/server/oplock.h1
-rw-r--r--fs/smb/server/server.c14
-rw-r--r--fs/vboxsf/super.c3
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c8
-rw-r--r--fs/xfs/xfs_file.c13
-rw-r--r--include/linux/blk-mq.h16
-rw-r--r--include/linux/cleanup.h2
-rw-r--r--include/linux/fsnotify.h21
-rw-r--r--include/linux/kvm_host.h4
-rw-r--r--include/linux/mm.h1
-rw-r--r--include/net/bluetooth/hci_core.h108
-rw-r--r--include/net/netfilter/nf_tables.h4
-rw-r--r--include/sound/soc.h5
-rw-r--r--init/Kconfig2
-rw-r--r--kernel/locking/rtmutex_common.h4
-rw-r--r--kernel/locking/semaphore.c13
-rw-r--r--kernel/sched/cputime.c8
-rw-r--r--kernel/sched/ext.c3
-rw-r--r--kernel/sched/sched.h4
-rw-r--r--kernel/trace/trace_events_hist.c24
-rw-r--r--mm/filemap.c86
-rw-r--r--mm/memory.c19
-rw-r--r--mm/nommu.c7
-rw-r--r--mm/readahead.c14
-rw-r--r--mm/util.c3
-rw-r--r--net/bluetooth/hci_core.c10
-rw-r--r--net/bluetooth/hci_event.c37
-rw-r--r--net/bluetooth/iso.c6
-rw-r--r--net/bluetooth/l2cap_core.c12
-rw-r--r--net/bluetooth/rfcomm/core.c6
-rw-r--r--net/bluetooth/sco.c25
-rw-r--r--net/core/dev.c3
-rw-r--r--net/core/devmem.c4
-rw-r--r--net/core/netpoll.c9
-rw-r--r--net/ethtool/tsinfo.c3
-rw-r--r--net/ipv6/addrconf.c15
-rw-r--r--net/mac80211/eht.c9
-rw-r--r--net/mac80211/rx.c10
-rw-r--r--net/mac80211/sta_info.c20
-rw-r--r--net/mac80211/util.c8
-rw-r--r--net/mctp/route.c10
-rw-r--r--net/mctp/test/route-test.c109
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c8
-rw-r--r--net/netfilter/nf_conncount.c6
-rw-r--r--net/netfilter/nf_tables_api.c24
-rw-r--r--net/netfilter/nft_compat.c8
-rw-r--r--net/netfilter/nft_ct.c6
-rw-r--r--net/netfilter/nft_exthdr.c10
-rw-r--r--net/openvswitch/conntrack.c30
-rw-r--r--net/openvswitch/datapath.h3
-rw-r--r--net/openvswitch/flow_netlink.c15
-rw-r--r--net/sched/sch_api.c6
-rw-r--r--net/sched/sch_gred.c3
-rw-r--r--net/switchdev/switchdev.c25
-rw-r--r--net/wireless/core.c7
-rw-r--r--net/wireless/nl80211.c12
-rw-r--r--rust/kernel/alloc/allocator_test.rs18
-rw-r--r--rust/kernel/error.rs2
-rw-r--r--rust/kernel/init.rs23
-rw-r--r--rust/kernel/init/macros.rs6
-rw-r--r--rust/kernel/lib.rs2
-rw-r--r--rust/kernel/sync.rs16
-rw-r--r--rust/kernel/sync/locked_by.rs2
-rw-r--r--rust/kernel/task.rs2
-rw-r--r--rust/kernel/workqueue.rs6
-rwxr-xr-xscripts/generate_rust_analyzer.py71
-rw-r--r--scripts/rustdoc_test_gen.rs4
-rw-r--r--sound/pci/hda/patch_realtek.c21
-rw-r--r--sound/soc/amd/yc/acp6x-mach.c7
-rw-r--r--sound/soc/codecs/cs42l43-jack.c13
-rw-r--r--sound/soc/codecs/cs42l43.c17
-rw-r--r--sound/soc/codecs/cs42l43.h3
-rw-r--r--sound/soc/codecs/rt1320-sdw.c3
-rw-r--r--sound/soc/codecs/rt722-sdca-sdw.c4
-rw-r--r--sound/soc/codecs/wm0010.c13
-rw-r--r--sound/soc/codecs/wsa884x.c4
-rw-r--r--sound/soc/intel/boards/sof_sdw.c2
-rw-r--r--sound/soc/soc-ops.c15
-rw-r--r--sound/soc/tegra/tegra210_adx.c4
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_options.sh4
-rwxr-xr-xtools/testing/selftests/drivers/net/ping.py200
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm46
-rw-r--r--tools/testing/selftests/kvm/access_tracking_perf_test.c2
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c523
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h33
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h2
-rw-r--r--tools/testing/selftests/kvm/include/x86/processor.h50
-rw-r--r--tools/testing/selftests/kvm/kvm_create_max_vcpus.c28
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c119
-rw-r--r--tools/testing/selftests/kvm/lib/userfaultfd_util.c2
-rw-r--r--tools/testing/selftests/kvm/riscv/sbi_pmu_test.c81
-rw-r--r--tools/testing/selftests/kvm/rseq_test.c31
-rw-r--r--tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c6
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_ipi.c6
-rw-r--r--tools/testing/selftests/kvm/x86/monitor_mwait_test.c108
-rw-r--r--tools/testing/selftests/kvm/x86/nested_emulation_test.c146
-rw-r--r--tools/testing/selftests/kvm/x86/nx_huge_pages_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86/pmu_counters_test.c158
-rw-r--r--tools/testing/selftests/kvm/x86/svm_int_ctl_test.c5
-rw-r--r--tools/testing/selftests/kvm/x86/ucna_injection_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_ipi_test.c16
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_state_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86/xen_shinfo_test.c5
-rw-r--r--tools/testing/selftests/net/Makefile1
-rwxr-xr-xtools/testing/selftests/net/gre_ipv6_lladdr.sh177
-rw-r--r--tools/testing/selftests/net/lib/xdp_dummy.bpf.c6
-rwxr-xr-xtools/testing/selftests/net/netfilter/br_netfilter.sh7
-rwxr-xr-xtools/testing/selftests/net/netfilter/br_netfilter_queue.sh7
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_queue.sh1
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json25
-rw-r--r--virt/kvm/Kconfig6
-rw-r--r--virt/kvm/eventfd.c10
-rw-r--r--virt/kvm/kvm_main.c64
317 files changed, 5008 insertions, 2997 deletions
diff --git a/.mailmap b/.mailmap
index 72bf830e1e58..be60c13d2ee1 100644
--- a/.mailmap
+++ b/.mailmap
@@ -281,6 +281,7 @@ Henrik Rydberg <rydberg@bitmath.org>
Herbert Xu <herbert@gondor.apana.org.au>
Huacai Chen <chenhuacai@kernel.org> <chenhc@lemote.com>
Huacai Chen <chenhuacai@kernel.org> <chenhuacai@loongson.cn>
+Ike Panhc <ikepanhc@gmail.com> <ike.pan@canonical.com>
J. Bruce Fields <bfields@fieldses.org> <bfields@redhat.com>
J. Bruce Fields <bfields@fieldses.org> <bfields@citi.umich.edu>
Jacob Shin <Jacob.Shin@amd.com>
diff --git a/Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml b/Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml
index e24cbd960993..bd8ede3a4ad8 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml
@@ -19,6 +19,7 @@ properties:
- imagis,ist3038
- imagis,ist3038b
- imagis,ist3038c
+ - imagis,ist3038h
reg:
maxItems: 1
diff --git a/Documentation/rust/quick-start.rst b/Documentation/rust/quick-start.rst
index 4aa50e5fcb8c..6d2607870ba4 100644
--- a/Documentation/rust/quick-start.rst
+++ b/Documentation/rust/quick-start.rst
@@ -145,7 +145,7 @@ Rust standard library source
****************************
The Rust standard library source is required because the build system will
-cross-compile ``core`` and ``alloc``.
+cross-compile ``core``.
If ``rustup`` is being used, run::
diff --git a/Documentation/rust/testing.rst b/Documentation/rust/testing.rst
index 568b71b415a4..180b886e0f1e 100644
--- a/Documentation/rust/testing.rst
+++ b/Documentation/rust/testing.rst
@@ -97,7 +97,7 @@ operator are also supported as usual, e.g.:
/// ```
/// # use kernel::{spawn_work_item, workqueue};
- /// spawn_work_item!(workqueue::system(), || pr_info!("x"))?;
+ /// spawn_work_item!(workqueue::system(), || pr_info!("x\n"))?;
/// # Ok::<(), Error>(())
/// ```
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 3468a2a7de6b..47c7c3f92314 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -1000,6 +1000,10 @@ blobs in userspace. When the guest writes the MSR, kvm copies one
page of a blob (32- or 64-bit, depending on the vcpu mode) to guest
memory.
+The MSR index must be in the range [0x40000000, 0x4fffffff], i.e. must reside
+in the range that is unofficially reserved for use by hypervisors. The min/max
+values are enumerated via KVM_XEN_MSR_MIN_INDEX and KVM_XEN_MSR_MAX_INDEX.
+
::
struct kvm_xen_hvm_config {
@@ -7443,6 +7447,75 @@ Unused bitfields in the bitarrays must be set to zero.
This capability connects the vcpu to an in-kernel XIVE device.
+6.76 KVM_CAP_HYPERV_SYNIC
+-------------------------
+
+:Architectures: x86
+:Target: vcpu
+
+This capability, if KVM_CHECK_EXTENSION indicates that it is
+available, means that the kernel has an implementation of the
+Hyper-V Synthetic interrupt controller(SynIC). Hyper-V SynIC is
+used to support Windows Hyper-V based guest paravirt drivers(VMBus).
+
+In order to use SynIC, it has to be activated by setting this
+capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this
+will disable the use of APIC hardware virtualization even if supported
+by the CPU, as it's incompatible with SynIC auto-EOI behavior.
+
+6.77 KVM_CAP_HYPERV_SYNIC2
+--------------------------
+
+:Architectures: x86
+:Target: vcpu
+
+This capability enables a newer version of Hyper-V Synthetic interrupt
+controller (SynIC). The only difference with KVM_CAP_HYPERV_SYNIC is that KVM
+doesn't clear SynIC message and event flags pages when they are enabled by
+writing to the respective MSRs.
+
+6.78 KVM_CAP_HYPERV_DIRECT_TLBFLUSH
+-----------------------------------
+
+:Architectures: x86
+:Target: vcpu
+
+This capability indicates that KVM running on top of Hyper-V hypervisor
+enables Direct TLB flush for its guests meaning that TLB flush
+hypercalls are handled by Level 0 hypervisor (Hyper-V) bypassing KVM.
+Due to the different ABI for hypercall parameters between Hyper-V and
+KVM, enabling this capability effectively disables all hypercall
+handling by KVM (as some KVM hypercall may be mistakenly treated as TLB
+flush hypercalls by Hyper-V) so userspace should disable KVM identification
+in CPUID and only exposes Hyper-V identification. In this case, guest
+thinks it's running on Hyper-V and only use Hyper-V hypercalls.
+
+6.79 KVM_CAP_HYPERV_ENFORCE_CPUID
+---------------------------------
+
+:Architectures: x86
+:Target: vcpu
+
+When enabled, KVM will disable emulated Hyper-V features provided to the
+guest according to the bits Hyper-V CPUID feature leaves. Otherwise, all
+currently implemented Hyper-V features are provided unconditionally when
+Hyper-V identification is set in the HYPERV_CPUID_INTERFACE (0x40000001)
+leaf.
+
+6.80 KVM_CAP_ENFORCE_PV_FEATURE_CPUID
+-------------------------------------
+
+:Architectures: x86
+:Target: vcpu
+
+When enabled, KVM will disable paravirtual features provided to the
+guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf
+(0x40000001). Otherwise, a guest may use the paravirtual features
+regardless of what has actually been exposed through the CPUID leaf.
+
+.. _KVM_CAP_DIRTY_LOG_RING:
+
+
.. _cap_enable_vm:
7. Capabilities that can be enabled on VMs
@@ -7923,10 +7996,10 @@ by POWER10 processor.
7.24 KVM_CAP_VM_COPY_ENC_CONTEXT_FROM
-------------------------------------
-Architectures: x86 SEV enabled
-Type: vm
-Parameters: args[0] is the fd of the source vm
-Returns: 0 on success; ENOTTY on error
+:Architectures: x86 SEV enabled
+:Type: vm
+:Parameters: args[0] is the fd of the source vm
+:Returns: 0 on success; ENOTTY on error
This capability enables userspace to copy encryption context from the vm
indicated by the fd to the vm this is called on.
@@ -7959,24 +8032,6 @@ default.
See Documentation/arch/x86/sgx.rst for more details.
-7.26 KVM_CAP_PPC_RPT_INVALIDATE
--------------------------------
-
-:Capability: KVM_CAP_PPC_RPT_INVALIDATE
-:Architectures: ppc
-:Type: vm
-
-This capability indicates that the kernel is capable of handling
-H_RPT_INVALIDATE hcall.
-
-In order to enable the use of H_RPT_INVALIDATE in the guest,
-user space might have to advertise it for the guest. For example,
-IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is
-present in the "ibm,hypertas-functions" device-tree property.
-
-This capability is enabled for hypervisors on platforms like POWER9
-that support radix MMU.
-
7.27 KVM_CAP_EXIT_ON_EMULATION_FAILURE
--------------------------------------
@@ -8034,24 +8089,9 @@ indicated by the fd to the VM this is called on.
This is intended to support intra-host migration of VMs between userspace VMMs,
upgrading the VMM process without interrupting the guest.
-7.30 KVM_CAP_PPC_AIL_MODE_3
--------------------------------
-
-:Capability: KVM_CAP_PPC_AIL_MODE_3
-:Architectures: ppc
-:Type: vm
-
-This capability indicates that the kernel supports the mode 3 setting for the
-"Address Translation Mode on Interrupt" aka "Alternate Interrupt Location"
-resource that is controlled with the H_SET_MODE hypercall.
-
-This capability allows a guest kernel to use a better-performance mode for
-handling interrupts and system calls.
-
7.31 KVM_CAP_DISABLE_QUIRKS2
----------------------------
-:Capability: KVM_CAP_DISABLE_QUIRKS2
:Parameters: args[0] - set of KVM quirks to disable
:Architectures: x86
:Type: vm
@@ -8206,27 +8246,6 @@ This capability is aimed to mitigate the threat that malicious VMs can
cause CPU stuck (due to event windows don't open up) and make the CPU
unavailable to host or other VMs.
-7.34 KVM_CAP_MEMORY_FAULT_INFO
-------------------------------
-
-:Architectures: x86
-:Returns: Informational only, -EINVAL on direct KVM_ENABLE_CAP.
-
-The presence of this capability indicates that KVM_RUN will fill
-kvm_run.memory_fault if KVM cannot resolve a guest page fault VM-Exit, e.g. if
-there is a valid memslot but no backing VMA for the corresponding host virtual
-address.
-
-The information in kvm_run.memory_fault is valid if and only if KVM_RUN returns
-an error with errno=EFAULT or errno=EHWPOISON *and* kvm_run.exit_reason is set
-to KVM_EXIT_MEMORY_FAULT.
-
-Note: Userspaces which attempt to resolve memory faults so that they can retry
-KVM_RUN are encouraged to guard against repeatedly receiving the same
-error/annotated fault.
-
-See KVM_EXIT_MEMORY_FAULT for more information.
-
7.35 KVM_CAP_X86_APIC_BUS_CYCLES_NS
-----------------------------------
@@ -8244,19 +8263,220 @@ by KVM_CHECK_EXTENSION.
Note: Userspace is responsible for correctly configuring CPUID 0x15, a.k.a. the
core crystal clock frequency, if a non-zero CPUID 0x15 is exposed to the guest.
-7.36 KVM_CAP_X86_GUEST_MODE
-------------------------------
+7.36 KVM_CAP_DIRTY_LOG_RING/KVM_CAP_DIRTY_LOG_RING_ACQ_REL
+----------------------------------------------------------
+
+:Architectures: x86, arm64
+:Type: vm
+:Parameters: args[0] - size of the dirty log ring
+
+KVM is capable of tracking dirty memory using ring buffers that are
+mmapped into userspace; there is one dirty ring per vcpu.
+
+The dirty ring is available to userspace as an array of
+``struct kvm_dirty_gfn``. Each dirty entry is defined as::
+
+ struct kvm_dirty_gfn {
+ __u32 flags;
+ __u32 slot; /* as_id | slot_id */
+ __u64 offset;
+ };
+
+The following values are defined for the flags field to define the
+current state of the entry::
+
+ #define KVM_DIRTY_GFN_F_DIRTY BIT(0)
+ #define KVM_DIRTY_GFN_F_RESET BIT(1)
+ #define KVM_DIRTY_GFN_F_MASK 0x3
+
+Userspace should call KVM_ENABLE_CAP ioctl right after KVM_CREATE_VM
+ioctl to enable this capability for the new guest and set the size of
+the rings. Enabling the capability is only allowed before creating any
+vCPU, and the size of the ring must be a power of two. The larger the
+ring buffer, the less likely the ring is full and the VM is forced to
+exit to userspace. The optimal size depends on the workload, but it is
+recommended that it be at least 64 KiB (4096 entries).
+
+Just like for dirty page bitmaps, the buffer tracks writes to
+all user memory regions for which the KVM_MEM_LOG_DIRTY_PAGES flag was
+set in KVM_SET_USER_MEMORY_REGION. Once a memory region is registered
+with the flag set, userspace can start harvesting dirty pages from the
+ring buffer.
+
+An entry in the ring buffer can be unused (flag bits ``00``),
+dirty (flag bits ``01``) or harvested (flag bits ``1X``). The
+state machine for the entry is as follows::
+
+ dirtied harvested reset
+ 00 -----------> 01 -------------> 1X -------+
+ ^ |
+ | |
+ +------------------------------------------+
+
+To harvest the dirty pages, userspace accesses the mmapped ring buffer
+to read the dirty GFNs. If the flags has the DIRTY bit set (at this stage
+the RESET bit must be cleared), then it means this GFN is a dirty GFN.
+The userspace should harvest this GFN and mark the flags from state
+``01b`` to ``1Xb`` (bit 0 will be ignored by KVM, but bit 1 must be set
+to show that this GFN is harvested and waiting for a reset), and move
+on to the next GFN. The userspace should continue to do this until the
+flags of a GFN have the DIRTY bit cleared, meaning that it has harvested
+all the dirty GFNs that were available.
+
+Note that on weakly ordered architectures, userspace accesses to the
+ring buffer (and more specifically the 'flags' field) must be ordered,
+using load-acquire/store-release accessors when available, or any
+other memory barrier that will ensure this ordering.
+
+It's not necessary for userspace to harvest the all dirty GFNs at once.
+However it must collect the dirty GFNs in sequence, i.e., the userspace
+program cannot skip one dirty GFN to collect the one next to it.
+
+After processing one or more entries in the ring buffer, userspace
+calls the VM ioctl KVM_RESET_DIRTY_RINGS to notify the kernel about
+it, so that the kernel will reprotect those collected GFNs.
+Therefore, the ioctl must be called *before* reading the content of
+the dirty pages.
+
+The dirty ring can get full. When it happens, the KVM_RUN of the
+vcpu will return with exit reason KVM_EXIT_DIRTY_LOG_FULL.
+
+The dirty ring interface has a major difference comparing to the
+KVM_GET_DIRTY_LOG interface in that, when reading the dirty ring from
+userspace, it's still possible that the kernel has not yet flushed the
+processor's dirty page buffers into the kernel buffer (with dirty bitmaps, the
+flushing is done by the KVM_GET_DIRTY_LOG ioctl). To achieve that, one
+needs to kick the vcpu out of KVM_RUN using a signal. The resulting
+vmexit ensures that all dirty GFNs are flushed to the dirty rings.
+
+NOTE: KVM_CAP_DIRTY_LOG_RING_ACQ_REL is the only capability that
+should be exposed by weakly ordered architecture, in order to indicate
+the additional memory ordering requirements imposed on userspace when
+reading the state of an entry and mutating it from DIRTY to HARVESTED.
+Architecture with TSO-like ordering (such as x86) are allowed to
+expose both KVM_CAP_DIRTY_LOG_RING and KVM_CAP_DIRTY_LOG_RING_ACQ_REL
+to userspace.
+
+After enabling the dirty rings, the userspace needs to detect the
+capability of KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP to see whether the
+ring structures can be backed by per-slot bitmaps. With this capability
+advertised, it means the architecture can dirty guest pages without
+vcpu/ring context, so that some of the dirty information will still be
+maintained in the bitmap structure. KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP
+can't be enabled if the capability of KVM_CAP_DIRTY_LOG_RING_ACQ_REL
+hasn't been enabled, or any memslot has been existing.
+
+Note that the bitmap here is only a backup of the ring structure. The
+use of the ring and bitmap combination is only beneficial if there is
+only a very small amount of memory that is dirtied out of vcpu/ring
+context. Otherwise, the stand-alone per-slot bitmap mechanism needs to
+be considered.
+
+To collect dirty bits in the backup bitmap, userspace can use the same
+KVM_GET_DIRTY_LOG ioctl. KVM_CLEAR_DIRTY_LOG isn't needed as long as all
+the generation of the dirty bits is done in a single pass. Collecting
+the dirty bitmap should be the very last thing that the VMM does before
+considering the state as complete. VMM needs to ensure that the dirty
+state is final and avoid missing dirty pages from another ioctl ordered
+after the bitmap collection.
+
+NOTE: Multiple examples of using the backup bitmap: (1) save vgic/its
+tables through command KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_SAVE_TABLES} on
+KVM device "kvm-arm-vgic-its". (2) restore vgic/its tables through
+command KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_RESTORE_TABLES} on KVM device
+"kvm-arm-vgic-its". VGICv3 LPI pending status is restored. (3) save
+vgic3 pending table through KVM_DEV_ARM_VGIC_{GRP_CTRL, SAVE_PENDING_TABLES}
+command on KVM device "kvm-arm-vgic-v3".
+
+7.37 KVM_CAP_PMU_CAPABILITY
+---------------------------
:Architectures: x86
-:Returns: Informational only, -EINVAL on direct KVM_ENABLE_CAP.
+:Type: vm
+:Parameters: arg[0] is bitmask of PMU virtualization capabilities.
+:Returns: 0 on success, -EINVAL when arg[0] contains invalid bits
-The presence of this capability indicates that KVM_RUN will update the
-KVM_RUN_X86_GUEST_MODE bit in kvm_run.flags to indicate whether the
-vCPU was executing nested guest code when it exited.
+This capability alters PMU virtualization in KVM.
-KVM exits with the register state of either the L1 or L2 guest
-depending on which executed at the time of an exit. Userspace must
-take care to differentiate between these cases.
+Calling KVM_CHECK_EXTENSION for this capability returns a bitmask of
+PMU virtualization capabilities that can be adjusted on a VM.
+
+The argument to KVM_ENABLE_CAP is also a bitmask and selects specific
+PMU virtualization capabilities to be applied to the VM. This can
+only be invoked on a VM prior to the creation of VCPUs.
+
+At this time, KVM_PMU_CAP_DISABLE is the only capability. Setting
+this capability will disable PMU virtualization for that VM. Usermode
+should adjust CPUID leaf 0xA to reflect that the PMU is disabled.
+
+7.38 KVM_CAP_VM_DISABLE_NX_HUGE_PAGES
+-------------------------------------
+
+:Architectures: x86
+:Type: vm
+:Parameters: arg[0] must be 0.
+:Returns: 0 on success, -EPERM if the userspace process does not
+ have CAP_SYS_BOOT, -EINVAL if args[0] is not 0 or any vCPUs have been
+ created.
+
+This capability disables the NX huge pages mitigation for iTLB MULTIHIT.
+
+The capability has no effect if the nx_huge_pages module parameter is not set.
+
+This capability may only be set before any vCPUs are created.
+
+7.39 KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
+---------------------------------------
+
+:Architectures: arm64
+:Type: vm
+:Parameters: arg[0] is the new split chunk size.
+:Returns: 0 on success, -EINVAL if any memslot was already created.
+
+This capability sets the chunk size used in Eager Page Splitting.
+
+Eager Page Splitting improves the performance of dirty-logging (used
+in live migrations) when guest memory is backed by huge-pages. It
+avoids splitting huge-pages (into PAGE_SIZE pages) on fault, by doing
+it eagerly when enabling dirty logging (with the
+KVM_MEM_LOG_DIRTY_PAGES flag for a memory region), or when using
+KVM_CLEAR_DIRTY_LOG.
+
+The chunk size specifies how many pages to break at a time, using a
+single allocation for each chunk. Bigger the chunk size, more pages
+need to be allocated ahead of time.
+
+The chunk size needs to be a valid block size. The list of acceptable
+block sizes is exposed in KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES as a
+64-bit bitmap (each bit describing a block size). The default value is
+0, to disable the eager page splitting.
+
+7.40 KVM_CAP_EXIT_HYPERCALL
+---------------------------
+
+:Architectures: x86
+:Type: vm
+
+This capability, if enabled, will cause KVM to exit to userspace
+with KVM_EXIT_HYPERCALL exit reason to process some hypercalls.
+
+Calling KVM_CHECK_EXTENSION for this capability will return a bitmask
+of hypercalls that can be configured to exit to userspace.
+Right now, the only such hypercall is KVM_HC_MAP_GPA_RANGE.
+
+The argument to KVM_ENABLE_CAP is also a bitmask, and must be a subset
+of the result of KVM_CHECK_EXTENSION. KVM will forward to userspace
+the hypercalls whose corresponding bit is in the argument, and return
+ENOSYS for the others.
+
+7.41 KVM_CAP_ARM_SYSTEM_SUSPEND
+-------------------------------
+
+:Architectures: arm64
+:Type: vm
+
+When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of
+type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request.
7.37 KVM_CAP_ARM_WRITABLE_IMP_ID_REGS
-------------------------------------
@@ -8293,21 +8513,6 @@ H_RANDOM hypercall backed by a hardware random-number generator.
If present, the kernel H_RANDOM handler can be enabled for guest use
with the KVM_CAP_PPC_ENABLE_HCALL capability.
-8.2 KVM_CAP_HYPERV_SYNIC
-------------------------
-
-:Architectures: x86
-
-This capability, if KVM_CHECK_EXTENSION indicates that it is
-available, means that the kernel has an implementation of the
-Hyper-V Synthetic interrupt controller(SynIC). Hyper-V SynIC is
-used to support Windows Hyper-V based guest paravirt drivers(VMBus).
-
-In order to use SynIC, it has to be activated by setting this
-capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this
-will disable the use of APIC hardware virtualization even if supported
-by the CPU, as it's incompatible with SynIC auto-EOI behavior.
-
8.3 KVM_CAP_PPC_MMU_RADIX
-------------------------
@@ -8358,20 +8563,6 @@ may be incompatible with the MIPS VZ ASE.
virtualization, including standard guest virtual memory segments.
== ==========================================================================
-8.6 KVM_CAP_MIPS_TE
--------------------
-
-:Architectures: mips
-
-This capability, if KVM_CHECK_EXTENSION on the main kvm handle indicates that
-it is available, means that the trap & emulate implementation is available to
-run guest code in user mode, even if KVM_CAP_MIPS_VZ indicates that hardware
-assisted virtualisation is also available. KVM_VM_MIPS_TE (0) must be passed
-to KVM_CREATE_VM to create a VM which utilises it.
-
-If KVM_CHECK_EXTENSION on a kvm VM handle indicates that this capability is
-available, it means that the VM is using trap & emulate.
-
8.7 KVM_CAP_MIPS_64BIT
----------------------
@@ -8453,16 +8644,6 @@ virtual SMT modes that can be set using KVM_CAP_PPC_SMT. If bit N
(counting from the right) is set, then a virtual SMT mode of 2^N is
available.
-8.11 KVM_CAP_HYPERV_SYNIC2
---------------------------
-
-:Architectures: x86
-
-This capability enables a newer version of Hyper-V Synthetic interrupt
-controller (SynIC). The only difference with KVM_CAP_HYPERV_SYNIC is that KVM
-doesn't clear SynIC message and event flags pages when they are enabled by
-writing to the respective MSRs.
-
8.12 KVM_CAP_HYPERV_VP_INDEX
----------------------------
@@ -8477,7 +8658,6 @@ capability is absent, userspace can still query this msr's value.
-------------------------------
:Architectures: s390
-:Parameters: none
This capability indicates if the flic device will be able to get/set the
AIS states for migration via the KVM_DEV_FLIC_AISM_ALL attribute and allows
@@ -8551,21 +8731,6 @@ This capability indicates that KVM supports paravirtualized Hyper-V IPI send
hypercalls:
HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx.
-8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH
------------------------------------
-
-:Architectures: x86
-
-This capability indicates that KVM running on top of Hyper-V hypervisor
-enables Direct TLB flush for its guests meaning that TLB flush
-hypercalls are handled by Level 0 hypervisor (Hyper-V) bypassing KVM.
-Due to the different ABI for hypercall parameters between Hyper-V and
-KVM, enabling this capability effectively disables all hypercall
-handling by KVM (as some KVM hypercall may be mistakenly treated as TLB
-flush hypercalls by Hyper-V) so userspace should disable KVM identification
-in CPUID and only exposes Hyper-V identification. In this case, guest
-thinks it's running on Hyper-V and only use Hyper-V hypercalls.
-
8.22 KVM_CAP_S390_VCPU_RESETS
-----------------------------
@@ -8643,142 +8808,6 @@ In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to
trap and emulate MSRs that are outside of the scope of KVM as well as
limit the attack surface on KVM's MSR emulation code.
-8.28 KVM_CAP_ENFORCE_PV_FEATURE_CPUID
--------------------------------------
-
-Architectures: x86
-
-When enabled, KVM will disable paravirtual features provided to the
-guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf
-(0x40000001). Otherwise, a guest may use the paravirtual features
-regardless of what has actually been exposed through the CPUID leaf.
-
-.. _KVM_CAP_DIRTY_LOG_RING:
-
-8.29 KVM_CAP_DIRTY_LOG_RING/KVM_CAP_DIRTY_LOG_RING_ACQ_REL
-----------------------------------------------------------
-
-:Architectures: x86, arm64
-:Parameters: args[0] - size of the dirty log ring
-
-KVM is capable of tracking dirty memory using ring buffers that are
-mmapped into userspace; there is one dirty ring per vcpu.
-
-The dirty ring is available to userspace as an array of
-``struct kvm_dirty_gfn``. Each dirty entry is defined as::
-
- struct kvm_dirty_gfn {
- __u32 flags;
- __u32 slot; /* as_id | slot_id */
- __u64 offset;
- };
-
-The following values are defined for the flags field to define the
-current state of the entry::
-
- #define KVM_DIRTY_GFN_F_DIRTY BIT(0)
- #define KVM_DIRTY_GFN_F_RESET BIT(1)
- #define KVM_DIRTY_GFN_F_MASK 0x3
-
-Userspace should call KVM_ENABLE_CAP ioctl right after KVM_CREATE_VM
-ioctl to enable this capability for the new guest and set the size of
-the rings. Enabling the capability is only allowed before creating any
-vCPU, and the size of the ring must be a power of two. The larger the
-ring buffer, the less likely the ring is full and the VM is forced to
-exit to userspace. The optimal size depends on the workload, but it is
-recommended that it be at least 64 KiB (4096 entries).
-
-Just like for dirty page bitmaps, the buffer tracks writes to
-all user memory regions for which the KVM_MEM_LOG_DIRTY_PAGES flag was
-set in KVM_SET_USER_MEMORY_REGION. Once a memory region is registered
-with the flag set, userspace can start harvesting dirty pages from the
-ring buffer.
-
-An entry in the ring buffer can be unused (flag bits ``00``),
-dirty (flag bits ``01``) or harvested (flag bits ``1X``). The
-state machine for the entry is as follows::
-
- dirtied harvested reset
- 00 -----------> 01 -------------> 1X -------+
- ^ |
- | |
- +------------------------------------------+
-
-To harvest the dirty pages, userspace accesses the mmapped ring buffer
-to read the dirty GFNs. If the flags has the DIRTY bit set (at this stage
-the RESET bit must be cleared), then it means this GFN is a dirty GFN.
-The userspace should harvest this GFN and mark the flags from state
-``01b`` to ``1Xb`` (bit 0 will be ignored by KVM, but bit 1 must be set
-to show that this GFN is harvested and waiting for a reset), and move
-on to the next GFN. The userspace should continue to do this until the
-flags of a GFN have the DIRTY bit cleared, meaning that it has harvested
-all the dirty GFNs that were available.
-
-Note that on weakly ordered architectures, userspace accesses to the
-ring buffer (and more specifically the 'flags' field) must be ordered,
-using load-acquire/store-release accessors when available, or any
-other memory barrier that will ensure this ordering.
-
-It's not necessary for userspace to harvest the all dirty GFNs at once.
-However it must collect the dirty GFNs in sequence, i.e., the userspace
-program cannot skip one dirty GFN to collect the one next to it.
-
-After processing one or more entries in the ring buffer, userspace
-calls the VM ioctl KVM_RESET_DIRTY_RINGS to notify the kernel about
-it, so that the kernel will reprotect those collected GFNs.
-Therefore, the ioctl must be called *before* reading the content of
-the dirty pages.
-
-The dirty ring can get full. When it happens, the KVM_RUN of the
-vcpu will return with exit reason KVM_EXIT_DIRTY_LOG_FULL.
-
-The dirty ring interface has a major difference comparing to the
-KVM_GET_DIRTY_LOG interface in that, when reading the dirty ring from
-userspace, it's still possible that the kernel has not yet flushed the
-processor's dirty page buffers into the kernel buffer (with dirty bitmaps, the
-flushing is done by the KVM_GET_DIRTY_LOG ioctl). To achieve that, one
-needs to kick the vcpu out of KVM_RUN using a signal. The resulting
-vmexit ensures that all dirty GFNs are flushed to the dirty rings.
-
-NOTE: KVM_CAP_DIRTY_LOG_RING_ACQ_REL is the only capability that
-should be exposed by weakly ordered architecture, in order to indicate
-the additional memory ordering requirements imposed on userspace when
-reading the state of an entry and mutating it from DIRTY to HARVESTED.
-Architecture with TSO-like ordering (such as x86) are allowed to
-expose both KVM_CAP_DIRTY_LOG_RING and KVM_CAP_DIRTY_LOG_RING_ACQ_REL
-to userspace.
-
-After enabling the dirty rings, the userspace needs to detect the
-capability of KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP to see whether the
-ring structures can be backed by per-slot bitmaps. With this capability
-advertised, it means the architecture can dirty guest pages without
-vcpu/ring context, so that some of the dirty information will still be
-maintained in the bitmap structure. KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP
-can't be enabled if the capability of KVM_CAP_DIRTY_LOG_RING_ACQ_REL
-hasn't been enabled, or any memslot has been existing.
-
-Note that the bitmap here is only a backup of the ring structure. The
-use of the ring and bitmap combination is only beneficial if there is
-only a very small amount of memory that is dirtied out of vcpu/ring
-context. Otherwise, the stand-alone per-slot bitmap mechanism needs to
-be considered.
-
-To collect dirty bits in the backup bitmap, userspace can use the same
-KVM_GET_DIRTY_LOG ioctl. KVM_CLEAR_DIRTY_LOG isn't needed as long as all
-the generation of the dirty bits is done in a single pass. Collecting
-the dirty bitmap should be the very last thing that the VMM does before
-considering the state as complete. VMM needs to ensure that the dirty
-state is final and avoid missing dirty pages from another ioctl ordered
-after the bitmap collection.
-
-NOTE: Multiple examples of using the backup bitmap: (1) save vgic/its
-tables through command KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_SAVE_TABLES} on
-KVM device "kvm-arm-vgic-its". (2) restore vgic/its tables through
-command KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_RESTORE_TABLES} on KVM device
-"kvm-arm-vgic-its". VGICv3 LPI pending status is restored. (3) save
-vgic3 pending table through KVM_DEV_ARM_VGIC_{GRP_CTRL, SAVE_PENDING_TABLES}
-command on KVM device "kvm-arm-vgic-v3".
-
8.30 KVM_CAP_XEN_HVM
--------------------
@@ -8843,10 +8872,9 @@ clearing the PVCLOCK_TSC_STABLE_BIT flag in Xen pvclock sources. This will be
done when the KVM_CAP_XEN_HVM ioctl sets the
KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE flag.
-8.31 KVM_CAP_PPC_MULTITCE
--------------------------
+8.31 KVM_CAP_SPAPR_MULTITCE
+---------------------------
-:Capability: KVM_CAP_PPC_MULTITCE
:Architectures: ppc
:Type: vm
@@ -8878,72 +8906,9 @@ This capability indicates that the KVM virtual PTP service is
supported in the host. A VMM can check whether the service is
available to the guest on migration.
-8.33 KVM_CAP_HYPERV_ENFORCE_CPUID
----------------------------------
-
-Architectures: x86
-
-When enabled, KVM will disable emulated Hyper-V features provided to the
-guest according to the bits Hyper-V CPUID feature leaves. Otherwise, all
-currently implemented Hyper-V features are provided unconditionally when
-Hyper-V identification is set in the HYPERV_CPUID_INTERFACE (0x40000001)
-leaf.
-
-8.34 KVM_CAP_EXIT_HYPERCALL
----------------------------
-
-:Capability: KVM_CAP_EXIT_HYPERCALL
-:Architectures: x86
-:Type: vm
-
-This capability, if enabled, will cause KVM to exit to userspace
-with KVM_EXIT_HYPERCALL exit reason to process some hypercalls.
-
-Calling KVM_CHECK_EXTENSION for this capability will return a bitmask
-of hypercalls that can be configured to exit to userspace.
-Right now, the only such hypercall is KVM_HC_MAP_GPA_RANGE.
-
-The argument to KVM_ENABLE_CAP is also a bitmask, and must be a subset
-of the result of KVM_CHECK_EXTENSION. KVM will forward to userspace
-the hypercalls whose corresponding bit is in the argument, and return
-ENOSYS for the others.
-
-8.35 KVM_CAP_PMU_CAPABILITY
----------------------------
-
-:Capability: KVM_CAP_PMU_CAPABILITY
-:Architectures: x86
-:Type: vm
-:Parameters: arg[0] is bitmask of PMU virtualization capabilities.
-:Returns: 0 on success, -EINVAL when arg[0] contains invalid bits
-
-This capability alters PMU virtualization in KVM.
-
-Calling KVM_CHECK_EXTENSION for this capability returns a bitmask of
-PMU virtualization capabilities that can be adjusted on a VM.
-
-The argument to KVM_ENABLE_CAP is also a bitmask and selects specific
-PMU virtualization capabilities to be applied to the VM. This can
-only be invoked on a VM prior to the creation of VCPUs.
-
-At this time, KVM_PMU_CAP_DISABLE is the only capability. Setting
-this capability will disable PMU virtualization for that VM. Usermode
-should adjust CPUID leaf 0xA to reflect that the PMU is disabled.
-
-8.36 KVM_CAP_ARM_SYSTEM_SUSPEND
--------------------------------
-
-:Capability: KVM_CAP_ARM_SYSTEM_SUSPEND
-:Architectures: arm64
-:Type: vm
-
-When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of
-type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request.
-
8.37 KVM_CAP_S390_PROTECTED_DUMP
--------------------------------
-:Capability: KVM_CAP_S390_PROTECTED_DUMP
:Architectures: s390
:Type: vm
@@ -8953,27 +8918,9 @@ PV guests. The `KVM_PV_DUMP` command is available for the
dump related UV data. Also the vcpu ioctl `KVM_S390_PV_CPU_COMMAND` is
available and supports the `KVM_PV_DUMP_CPU` subcommand.
-8.38 KVM_CAP_VM_DISABLE_NX_HUGE_PAGES
--------------------------------------
-
-:Capability: KVM_CAP_VM_DISABLE_NX_HUGE_PAGES
-:Architectures: x86
-:Type: vm
-:Parameters: arg[0] must be 0.
-:Returns: 0 on success, -EPERM if the userspace process does not
- have CAP_SYS_BOOT, -EINVAL if args[0] is not 0 or any vCPUs have been
- created.
-
-This capability disables the NX huge pages mitigation for iTLB MULTIHIT.
-
-The capability has no effect if the nx_huge_pages module parameter is not set.
-
-This capability may only be set before any vCPUs are created.
-
8.39 KVM_CAP_S390_CPU_TOPOLOGY
------------------------------
-:Capability: KVM_CAP_S390_CPU_TOPOLOGY
:Architectures: s390
:Type: vm
@@ -8995,37 +8942,9 @@ structure.
When getting the Modified Change Topology Report value, the attr->addr
must point to a byte where the value will be stored or retrieved from.
-8.40 KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
----------------------------------------
-
-:Capability: KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
-:Architectures: arm64
-:Type: vm
-:Parameters: arg[0] is the new split chunk size.
-:Returns: 0 on success, -EINVAL if any memslot was already created.
-
-This capability sets the chunk size used in Eager Page Splitting.
-
-Eager Page Splitting improves the performance of dirty-logging (used
-in live migrations) when guest memory is backed by huge-pages. It
-avoids splitting huge-pages (into PAGE_SIZE pages) on fault, by doing
-it eagerly when enabling dirty logging (with the
-KVM_MEM_LOG_DIRTY_PAGES flag for a memory region), or when using
-KVM_CLEAR_DIRTY_LOG.
-
-The chunk size specifies how many pages to break at a time, using a
-single allocation for each chunk. Bigger the chunk size, more pages
-need to be allocated ahead of time.
-
-The chunk size needs to be a valid block size. The list of acceptable
-block sizes is exposed in KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES as a
-64-bit bitmap (each bit describing a block size). The default value is
-0, to disable the eager page splitting.
-
8.41 KVM_CAP_VM_TYPES
---------------------
-:Capability: KVM_CAP_MEMORY_ATTRIBUTES
:Architectures: x86
:Type: system ioctl
@@ -9042,6 +8961,67 @@ Do not use KVM_X86_SW_PROTECTED_VM for "real" VMs, and especially not in
production. The behavior and effective ABI for software-protected VMs is
unstable.
+8.42 KVM_CAP_PPC_RPT_INVALIDATE
+-------------------------------
+
+:Architectures: ppc
+
+This capability indicates that the kernel is capable of handling
+H_RPT_INVALIDATE hcall.
+
+In order to enable the use of H_RPT_INVALIDATE in the guest,
+user space might have to advertise it for the guest. For example,
+IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is
+present in the "ibm,hypertas-functions" device-tree property.
+
+This capability is enabled for hypervisors on platforms like POWER9
+that support radix MMU.
+
+8.43 KVM_CAP_PPC_AIL_MODE_3
+---------------------------
+
+:Architectures: ppc
+
+This capability indicates that the kernel supports the mode 3 setting for the
+"Address Translation Mode on Interrupt" aka "Alternate Interrupt Location"
+resource that is controlled with the H_SET_MODE hypercall.
+
+This capability allows a guest kernel to use a better-performance mode for
+handling interrupts and system calls.
+
+8.44 KVM_CAP_MEMORY_FAULT_INFO
+------------------------------
+
+:Architectures: x86
+
+The presence of this capability indicates that KVM_RUN will fill
+kvm_run.memory_fault if KVM cannot resolve a guest page fault VM-Exit, e.g. if
+there is a valid memslot but no backing VMA for the corresponding host virtual
+address.
+
+The information in kvm_run.memory_fault is valid if and only if KVM_RUN returns
+an error with errno=EFAULT or errno=EHWPOISON *and* kvm_run.exit_reason is set
+to KVM_EXIT_MEMORY_FAULT.
+
+Note: Userspaces which attempt to resolve memory faults so that they can retry
+KVM_RUN are encouraged to guard against repeatedly receiving the same
+error/annotated fault.
+
+See KVM_EXIT_MEMORY_FAULT for more information.
+
+8.45 KVM_CAP_X86_GUEST_MODE
+---------------------------
+
+:Architectures: x86
+
+The presence of this capability indicates that KVM_RUN will update the
+KVM_RUN_X86_GUEST_MODE bit in kvm_run.flags to indicate whether the
+vCPU was executing nested guest code when it exited.
+
+KVM exits with the register state of either the L1 or L2 guest
+depending on which executed at the time of an exit. Userspace must
+take care to differentiate between these cases.
+
9. Known KVM API problems
=========================
@@ -9072,9 +9052,10 @@ the local APIC.
The same is true for the ``KVM_FEATURE_PV_UNHALT`` paravirtualized feature.
-CPU[EAX=1]:ECX[24] (TSC_DEADLINE) is not reported by ``KVM_GET_SUPPORTED_CPUID``.
-It can be enabled if ``KVM_CAP_TSC_DEADLINE_TIMER`` is present and the kernel
-has enabled in-kernel emulation of the local APIC.
+On older versions of Linux, CPU[EAX=1]:ECX[24] (TSC_DEADLINE) is not reported by
+``KVM_GET_SUPPORTED_CPUID``, but it can be enabled if ``KVM_CAP_TSC_DEADLINE_TIMER``
+is present and the kernel has enabled in-kernel emulation of the local APIC.
+On newer versions, ``KVM_GET_SUPPORTED_CPUID`` does report the bit as available.
CPU topology
~~~~~~~~~~~~
diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst
index c56d5f26c750..ae8bce7fecbe 100644
--- a/Documentation/virt/kvm/locking.rst
+++ b/Documentation/virt/kvm/locking.rst
@@ -196,7 +196,7 @@ writable between reading spte and updating spte. Like below case:
The Dirty bit is lost in this case.
In order to avoid this kind of issue, we always treat the spte as "volatile"
-if it can be updated out of mmu-lock [see spte_has_volatile_bits()]; it means
+if it can be updated out of mmu-lock [see spte_needs_atomic_update()]; it means
the spte is always atomically updated in this case.
3) flush tlbs due to spte updated
@@ -212,7 +212,7 @@ function to update spte (present -> present).
Since the spte is "volatile" if it can be updated out of mmu-lock, we always
atomically update the spte and the race caused by fast page fault can be avoided.
-See the comments in spte_has_volatile_bits() and mmu_spte_update().
+See the comments in spte_needs_atomic_update() and mmu_spte_update().
Lockless Access Tracking:
diff --git a/MAINTAINERS b/MAINTAINERS
index ed7aa6867674..c9763412a508 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -124,6 +124,7 @@ F: include/net/ieee80211_radiotap.h
F: include/net/iw_handler.h
F: include/net/wext.h
F: include/uapi/linux/nl80211.h
+N: include/uapi/linux/nl80211-.*
F: include/uapi/linux/wireless.h
F: net/wireless/
@@ -514,7 +515,7 @@ F: drivers/hwmon/adm1029.c
ADM8211 WIRELESS DRIVER
L: linux-wireless@vger.kernel.org
S: Orphan
-F: drivers/net/wireless/admtek/adm8211.*
+F: drivers/net/wireless/admtek/
ADP1050 HARDWARE MONITOR DRIVER
M: Radu Sabau <radu.sabau@analog.com>
@@ -6207,7 +6208,7 @@ F: Documentation/process/cve.rst
CW1200 WLAN driver
S: Orphan
-F: drivers/net/wireless/st/cw1200/
+F: drivers/net/wireless/st/
F: include/linux/platform_data/net-cw1200.h
CX18 VIDEO4LINUX DRIVER
@@ -9828,7 +9829,6 @@ S: Maintained
F: drivers/media/usb/go7007/
GOODIX TOUCHSCREEN
-M: Bastien Nocera <hadess@hadess.net>
M: Hans de Goede <hdegoede@redhat.com>
L: linux-input@vger.kernel.org
S: Maintained
@@ -11140,7 +11140,7 @@ S: Maintained
F: drivers/i2c/busses/i2c-icy.c
IDEAPAD LAPTOP EXTRAS DRIVER
-M: Ike Panhc <ike.pan@canonical.com>
+M: Ike Panhc <ikepanhc@gmail.com>
L: platform-driver-x86@vger.kernel.org
S: Maintained
W: http://launchpad.net/ideapad-laptop
@@ -13994,6 +13994,7 @@ MARVELL LIBERTAS WIRELESS DRIVER
L: libertas-dev@lists.infradead.org
S: Orphan
F: drivers/net/wireless/marvell/libertas/
+F: drivers/net/wireless/marvell/libertas_tf/
MARVELL MACCHIATOBIN SUPPORT
M: Russell King <linux@armlinux.org.uk>
@@ -15663,7 +15664,7 @@ M: Ajay Singh <ajay.kathat@microchip.com>
M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
L: linux-wireless@vger.kernel.org
S: Supported
-F: drivers/net/wireless/microchip/wilc1000/
+F: drivers/net/wireless/microchip/
MICROSEMI MIPS SOCS
M: Alexandre Belloni <alexandre.belloni@bootlin.com>
@@ -16449,6 +16450,23 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git
F: Documentation/devicetree/bindings/net/wireless/
F: drivers/net/wireless/
+X: drivers/net/wireless/ath/
+X: drivers/net/wireless/broadcom/
+X: drivers/net/wireless/intel/
+X: drivers/net/wireless/intersil/
+X: drivers/net/wireless/marvell/
+X: drivers/net/wireless/mediatek/mt76/
+X: drivers/net/wireless/mediatek/mt7601u/
+X: drivers/net/wireless/microchip/
+X: drivers/net/wireless/purelifi/
+X: drivers/net/wireless/quantenna/
+X: drivers/net/wireless/ralink/
+X: drivers/net/wireless/realtek/
+X: drivers/net/wireless/rsi/
+X: drivers/net/wireless/silabs/
+X: drivers/net/wireless/st/
+X: drivers/net/wireless/ti/
+X: drivers/net/wireless/zydas/
NETWORKING [DSA]
M: Andrew Lunn <andrew@lunn.ch>
@@ -17833,7 +17851,7 @@ M: Christian Lamparter <chunkeey@googlemail.com>
L: linux-wireless@vger.kernel.org
S: Maintained
W: https://wireless.wiki.kernel.org/en/users/Drivers/p54
-F: drivers/net/wireless/intersil/p54/
+F: drivers/net/wireless/intersil/
PACKET SOCKETS
M: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
@@ -19110,7 +19128,7 @@ PURELIFI PLFXLC DRIVER
M: Srinivasan Raju <srini.raju@purelifi.com>
L: linux-wireless@vger.kernel.org
S: Supported
-F: drivers/net/wireless/purelifi/plfxlc/
+F: drivers/net/wireless/purelifi/
PVRUSB2 VIDEO4LINUX DRIVER
M: Mike Isely <isely@pobox.com>
@@ -19661,7 +19679,7 @@ M: Igor Mitsyanko <imitsyanko@quantenna.com>
R: Sergey Matyukevich <geomatsi@gmail.com>
L: linux-wireless@vger.kernel.org
S: Maintained
-F: drivers/net/wireless/quantenna
+F: drivers/net/wireless/quantenna/
RADEON and AMDGPU DRM DRIVERS
M: Alex Deucher <alexander.deucher@amd.com>
@@ -19741,7 +19759,7 @@ RALINK RT2X00 WIRELESS LAN DRIVER
M: Stanislaw Gruszka <stf_xl@wp.pl>
L: linux-wireless@vger.kernel.org
S: Maintained
-F: drivers/net/wireless/ralink/rt2x00/
+F: drivers/net/wireless/ralink/
RAMDISK RAM BLOCK DEVICE DRIVER
M: Jens Axboe <axboe@kernel.dk>
@@ -21089,6 +21107,7 @@ F: include/linux/clk/samsung.h
SAMSUNG SPI DRIVERS
M: Andi Shyti <andi.shyti@kernel.org>
+R: Tudor Ambarus <tudor.ambarus@linaro.org>
L: linux-spi@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
S: Maintained
@@ -21499,7 +21518,6 @@ F: include/linux/slimbus.h
SFC NETWORK DRIVER
M: Edward Cree <ecree.xilinx@gmail.com>
-M: Martin Habets <habetsm.xilinx@gmail.com>
L: netdev@vger.kernel.org
L: linux-net-drivers@amd.com
S: Maintained
@@ -21708,7 +21726,7 @@ SILICON LABS WIRELESS DRIVERS (for WFxxx series)
M: Jérôme Pouiller <jerome.pouiller@silabs.com>
S: Supported
F: Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml
-F: drivers/net/wireless/silabs/wfx/
+F: drivers/net/wireless/silabs/
SILICON MOTION SM712 FRAME BUFFER DRIVER
M: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
@@ -26208,7 +26226,7 @@ F: mm/zbud.c
ZD1211RW WIRELESS DRIVER
L: linux-wireless@vger.kernel.org
S: Orphan
-F: drivers/net/wireless/zydas/zd1211rw/
+F: drivers/net/wireless/zydas/
ZD1301 MEDIA DRIVER
L: linux-media@vger.kernel.org
diff --git a/Makefile b/Makefile
index 1d6a9ec8a2ac..50694bbbf828 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 14
SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION = -rc7
NAME = Baby Opossum Posse
# *DOCUMENTATION*
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a73fe3671e3d..e98cfe7855a6 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1375,8 +1375,6 @@ static inline bool kvm_system_needs_idmapped_vectors(void)
return cpus_have_final_cap(ARM64_SPECTRE_V3A);
}
-static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-
void kvm_init_host_debug_data(void);
void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu);
void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index bc94e036a26b..8104aee4f9a0 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -396,33 +396,35 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
#define __flush_tlb_range_op(op, start, pages, stride, \
asid, tlb_level, tlbi_user, lpa2) \
do { \
+ typeof(start) __flush_start = start; \
+ typeof(pages) __flush_pages = pages; \
int num = 0; \
int scale = 3; \
int shift = lpa2 ? 16 : PAGE_SHIFT; \
unsigned long addr; \
\
- while (pages > 0) { \
+ while (__flush_pages > 0) { \
if (!system_supports_tlb_range() || \
- pages == 1 || \
- (lpa2 && start != ALIGN(start, SZ_64K))) { \
- addr = __TLBI_VADDR(start, asid); \
+ __flush_pages == 1 || \
+ (lpa2 && __flush_start != ALIGN(__flush_start, SZ_64K))) { \
+ addr = __TLBI_VADDR(__flush_start, asid); \
__tlbi_level(op, addr, tlb_level); \
if (tlbi_user) \
__tlbi_user_level(op, addr, tlb_level); \
- start += stride; \
- pages -= stride >> PAGE_SHIFT; \
+ __flush_start += stride; \
+ __flush_pages -= stride >> PAGE_SHIFT; \
continue; \
} \
\
- num = __TLBI_RANGE_NUM(pages, scale); \
+ num = __TLBI_RANGE_NUM(__flush_pages, scale); \
if (num >= 0) { \
- addr = __TLBI_VADDR_RANGE(start >> shift, asid, \
+ addr = __TLBI_VADDR_RANGE(__flush_start >> shift, asid, \
scale, num, tlb_level); \
__tlbi(r##op, addr); \
if (tlbi_user) \
__tlbi_user(r##op, addr); \
- start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
- pages -= __TLBI_RANGE_PAGES(num, scale); \
+ __flush_start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
+ __flush_pages -= __TLBI_RANGE_PAGES(num, scale);\
} \
scale--; \
} \
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index b4df5bc5b1b8..1dfe1a8efdbe 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1177,8 +1177,11 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{
WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
+ /* [start, end] should be within one section */
+ WARN_ON_ONCE(end - start > PAGES_PER_SECTION * sizeof(struct page));
- if (!IS_ENABLED(CONFIG_ARM64_4K_PAGES))
+ if (!IS_ENABLED(CONFIG_ARM64_4K_PAGES) ||
+ (end - start < PAGES_PER_SECTION * sizeof(struct page)))
return vmemmap_populate_basepages(start, end, node, altmap);
else
return vmemmap_populate_hugepages(start, end, node, altmap);
diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
index 590982cd986e..f457c2662e2f 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -12,6 +12,7 @@
#include <linux/kvm.h>
#include <linux/kvm_types.h>
#include <linux/mutex.h>
+#include <linux/perf_event.h>
#include <linux/spinlock.h>
#include <linux/threads.h>
#include <linux/types.h>
@@ -176,6 +177,9 @@ struct kvm_vcpu_arch {
/* Pointers stored here for easy accessing from assembly code */
int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
+ /* GPA (=HVA) of PGD for secondary mmu */
+ unsigned long kvm_pgd;
+
/* Host registers preserved across guest mode execution */
unsigned long host_sp;
unsigned long host_tp;
@@ -289,6 +293,8 @@ static inline int kvm_get_pmu_num(struct kvm_vcpu_arch *arch)
return (arch->cpucfg[6] & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT;
}
+bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu);
+
/* Debug: dump vcpu state */
int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu);
@@ -320,7 +326,6 @@ static inline bool kvm_is_ifetch_fault(struct kvm_vcpu_arch *arch)
/* Misc */
static inline void kvm_arch_hardware_unsetup(void) {}
-static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c
index 8be1c38ad8eb..c19f446ad0db 100644
--- a/arch/loongarch/kernel/asm-offsets.c
+++ b/arch/loongarch/kernel/asm-offsets.c
@@ -296,6 +296,7 @@ static void __used output_kvm_defines(void)
OFFSET(KVM_ARCH_HSP, kvm_vcpu_arch, host_sp);
OFFSET(KVM_ARCH_HTP, kvm_vcpu_arch, host_tp);
OFFSET(KVM_ARCH_HPGD, kvm_vcpu_arch, host_pgd);
+ OFFSET(KVM_ARCH_KVMPGD, kvm_vcpu_arch, kvm_pgd);
OFFSET(KVM_ARCH_HANDLE_EXIT, kvm_vcpu_arch, handle_exit);
OFFSET(KVM_ARCH_HEENTRY, kvm_vcpu_arch, host_eentry);
OFFSET(KVM_ARCH_GEENTRY, kvm_vcpu_arch, guest_eentry);
diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig
index 97a811077ac3..40eea6da7c25 100644
--- a/arch/loongarch/kvm/Kconfig
+++ b/arch/loongarch/kvm/Kconfig
@@ -33,6 +33,7 @@ config KVM
select KVM_MMIO
select KVM_XFER_TO_GUEST_WORK
select SCHED_INFO
+ select GUEST_PERF_EVENTS if PERF_EVENTS
help
Support hosting virtualized guest machines using
hardware virtualization extensions. You will need
diff --git a/arch/loongarch/kvm/Makefile b/arch/loongarch/kvm/Makefile
index 3a01292f71cc..f4c8e35c216a 100644
--- a/arch/loongarch/kvm/Makefile
+++ b/arch/loongarch/kvm/Makefile
@@ -3,8 +3,6 @@
# Makefile for LoongArch KVM support
#
-ccflags-y += -I $(src)
-
include $(srctree)/virt/kvm/Makefile.kvm
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c
index b6864d6e5ec8..d165cd38c6bb 100644
--- a/arch/loongarch/kvm/main.c
+++ b/arch/loongarch/kvm/main.c
@@ -394,6 +394,7 @@ static int kvm_loongarch_env_init(void)
}
kvm_init_gcsr_flag();
+ kvm_register_perf_callbacks(NULL);
/* Register LoongArch IPI interrupt controller interface. */
ret = kvm_loongarch_register_ipi_device();
@@ -425,6 +426,8 @@ static void kvm_loongarch_env_exit(void)
}
kfree(kvm_loongarch_ops);
}
+
+ kvm_unregister_perf_callbacks();
}
static int kvm_loongarch_init(void)
diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
index 1be185e94807..f1768b7a6194 100644
--- a/arch/loongarch/kvm/switch.S
+++ b/arch/loongarch/kvm/switch.S
@@ -60,16 +60,8 @@
ld.d t0, a2, KVM_ARCH_GPC
csrwr t0, LOONGARCH_CSR_ERA
- /* Save host PGDL */
- csrrd t0, LOONGARCH_CSR_PGDL
- st.d t0, a2, KVM_ARCH_HPGD
-
- /* Switch to kvm */
- ld.d t1, a2, KVM_VCPU_KVM - KVM_VCPU_ARCH
-
- /* Load guest PGDL */
- li.w t0, KVM_GPGD
- ldx.d t0, t1, t0
+ /* Load PGD for KVM hypervisor */
+ ld.d t0, a2, KVM_ARCH_KVMPGD
csrwr t0, LOONGARCH_CSR_PGDL
/* Mix GID and RID */
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 9e1a9b4aa4c6..552cde722932 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -361,6 +361,34 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
{
+ unsigned long val;
+
+ preempt_disable();
+ val = gcsr_read(LOONGARCH_CSR_CRMD);
+ preempt_enable();
+
+ return (val & CSR_PRMD_PPLV) == PLV_KERN;
+}
+
+#ifdef CONFIG_GUEST_PERF_EVENTS
+unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.pc;
+}
+
+/*
+ * Returns true if a Performance Monitoring Interrupt (PMI), a.k.a. perf event,
+ * arrived in guest context. For LoongArch64, if PMU is not passthrough to VM,
+ * any event that arrives while a vCPU is loaded is considered to be "in guest".
+ */
+bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu)
+{
+ return (vcpu && !(vcpu->arch.aux_inuse & KVM_LARCH_PMU));
+}
+#endif
+
+bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu)
+{
return false;
}
@@ -1462,6 +1490,15 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
hrtimer_init(&vcpu->arch.swtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
vcpu->arch.swtimer.function = kvm_swtimer_wakeup;
+ /* Get GPA (=HVA) of PGD for kvm hypervisor */
+ vcpu->arch.kvm_pgd = __pa(vcpu->kvm->arch.pgd);
+
+ /*
+ * Get PGD for primary mmu, virtual address is used since there is
+ * memory access after loading from CSR_PGD in tlb exception fast path.
+ */
+ vcpu->arch.host_pgd = (unsigned long)vcpu->kvm->mm->pgd;
+
vcpu->arch.handle_exit = kvm_handle_exit;
vcpu->arch.guest_eentry = (unsigned long)kvm_loongarch_ops->exc_entry;
vcpu->arch.csr = kzalloc(sizeof(struct loongarch_csrs), GFP_KERNEL);
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index f7222eb594ea..c14b10821817 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -886,7 +886,6 @@ extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
extern int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
struct kvm_mips_interrupt *irq);
-static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_free_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot) {}
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 6e1108f8fce6..2d139c807577 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -902,7 +902,6 @@ struct kvm_vcpu_arch {
#define __KVM_HAVE_ARCH_WQP
#define __KVM_HAVE_CREATE_DEVICE
-static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs
index 1916cf7ba450..17606940bb52 100644
--- a/arch/riscv/Kconfig.socs
+++ b/arch/riscv/Kconfig.socs
@@ -26,6 +26,7 @@ config ARCH_SOPHGO
config ARCH_SPACEMIT
bool "SpacemiT SoCs"
+ select PINCTRL
help
This enables support for SpacemiT SoC platform hardware.
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index cc33e35cd628..0e9c2fab6378 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -301,8 +301,6 @@ static inline bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu)
return IS_ENABLED(CONFIG_GUEST_PERF_EVENTS) && !!vcpu;
}
-static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-
#define KVM_RISCV_GSTAGE_TLB_MIN_ORDER 12
void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid,
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index 1fa8be5ee509..4b24705dc63a 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -172,8 +172,8 @@ module_init(riscv_kvm_init);
static void __exit riscv_kvm_exit(void)
{
- kvm_riscv_teardown();
-
kvm_exit();
+
+ kvm_riscv_teardown();
}
module_exit(riscv_kvm_exit);
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
index f6d27b59c641..43ee8e33ba23 100644
--- a/arch/riscv/kvm/vcpu_onereg.c
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -203,7 +203,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
case KVM_RISCV_ISA_EXT_SVADE:
/*
* The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero.
- * Svade is not allowed to disable when the platform use Svade.
+ * Svade can't be disabled unless we support Svadu.
*/
return arch_has_hw_pte_young();
default:
diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c
index 2707a51b082c..78ac3216a54d 100644
--- a/arch/riscv/kvm/vcpu_pmu.c
+++ b/arch/riscv/kvm/vcpu_pmu.c
@@ -666,6 +666,7 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
.type = etype,
.size = sizeof(struct perf_event_attr),
.pinned = true,
+ .disabled = true,
/*
* It should never reach here if the platform doesn't support the sscofpmf
* extension as mode filtering won't work without it.
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index 4e73ef46d4b2..9f2814d0e1e9 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -139,7 +139,6 @@ int s390_replace_asce(struct gmap *gmap);
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
unsigned long end, bool interruptible);
-int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split);
unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level);
/**
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 9a367866cab0..424f899d8163 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -1056,7 +1056,6 @@ bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu);
extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
-static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_free_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot) {}
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index b11f5b6d0bd1..46fb0ef6f984 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -631,7 +631,7 @@ int uv_pin_shared(unsigned long paddr);
int uv_destroy_folio(struct folio *folio);
int uv_destroy_pte(pte_t pte);
int uv_convert_from_secure_pte(pte_t pte);
-int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb);
+int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb);
int uv_convert_from_secure(unsigned long paddr);
int uv_convert_from_secure_folio(struct folio *folio);
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 9f05df2da2f7..9a5d5be8acf4 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -206,6 +206,39 @@ int uv_convert_from_secure_pte(pte_t pte)
return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte)));
}
+/**
+ * should_export_before_import - Determine whether an export is needed
+ * before an import-like operation
+ * @uvcb: the Ultravisor control block of the UVC to be performed
+ * @mm: the mm of the process
+ *
+ * Returns whether an export is needed before every import-like operation.
+ * This is needed for shared pages, which don't trigger a secure storage
+ * exception when accessed from a different guest.
+ *
+ * Although considered as one, the Unpin Page UVC is not an actual import,
+ * so it is not affected.
+ *
+ * No export is needed also when there is only one protected VM, because the
+ * page cannot belong to the wrong VM in that case (there is no "other VM"
+ * it can belong to).
+ *
+ * Return: true if an export is needed before every import, otherwise false.
+ */
+static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
+{
+ /*
+ * The misc feature indicates, among other things, that importing a
+ * shared page from a different protected VM will automatically also
+ * transfer its ownership.
+ */
+ if (uv_has_feature(BIT_UV_FEAT_MISC))
+ return false;
+ if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
+ return false;
+ return atomic_read(&mm->context.protected_count) > 1;
+}
+
/*
* Calculate the expected ref_count for a folio that would otherwise have no
* further pins. This was cribbed from similar functions in other places in
@@ -228,7 +261,7 @@ static int expected_folio_refs(struct folio *folio)
}
/**
- * make_folio_secure() - make a folio secure
+ * __make_folio_secure() - make a folio secure
* @folio: the folio to make secure
* @uvcb: the uvcb that describes the UVC to be used
*
@@ -237,20 +270,18 @@ static int expected_folio_refs(struct folio *folio)
*
* Return: 0 on success;
* -EBUSY if the folio is in writeback or has too many references;
- * -E2BIG if the folio is large;
* -EAGAIN if the UVC needs to be attempted again;
* -ENXIO if the address is not mapped;
* -EINVAL if the UVC failed for other reasons.
*
* Context: The caller must hold exactly one extra reference on the folio
- * (it's the same logic as split_folio())
+ * (it's the same logic as split_folio()), and the folio must be
+ * locked.
*/
-int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
+static int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
{
int expected, cc = 0;
- if (folio_test_large(folio))
- return -E2BIG;
if (folio_test_writeback(folio))
return -EBUSY;
expected = expected_folio_refs(folio) + 1;
@@ -277,7 +308,98 @@ int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
return -EAGAIN;
return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
}
-EXPORT_SYMBOL_GPL(make_folio_secure);
+
+static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct uv_cb_header *uvcb)
+{
+ int rc;
+
+ if (!folio_trylock(folio))
+ return -EAGAIN;
+ if (should_export_before_import(uvcb, mm))
+ uv_convert_from_secure(folio_to_phys(folio));
+ rc = __make_folio_secure(folio, uvcb);
+ folio_unlock(folio);
+
+ return rc;
+}
+
+/**
+ * s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split.
+ * @mm: the mm containing the folio to work on
+ * @folio: the folio
+ * @split: whether to split a large folio
+ *
+ * Context: Must be called while holding an extra reference to the folio;
+ * the mm lock should not be held.
+ * Return: 0 if the folio was split successfully;
+ * -EAGAIN if the folio was not split successfully but another attempt
+ * can be made, or if @split was set to false;
+ * -EINVAL in case of other errors. See split_folio().
+ */
+static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split)
+{
+ int rc;
+
+ lockdep_assert_not_held(&mm->mmap_lock);
+ folio_wait_writeback(folio);
+ lru_add_drain_all();
+ if (split) {
+ folio_lock(folio);
+ rc = split_folio(folio);
+ folio_unlock(folio);
+
+ if (rc != -EBUSY)
+ return rc;
+ }
+ return -EAGAIN;
+}
+
+int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb)
+{
+ struct vm_area_struct *vma;
+ struct folio_walk fw;
+ struct folio *folio;
+ int rc;
+
+ mmap_read_lock(mm);
+ vma = vma_lookup(mm, hva);
+ if (!vma) {
+ mmap_read_unlock(mm);
+ return -EFAULT;
+ }
+ folio = folio_walk_start(&fw, vma, hva, 0);
+ if (!folio) {
+ mmap_read_unlock(mm);
+ return -ENXIO;
+ }
+
+ folio_get(folio);
+ /*
+ * Secure pages cannot be huge and userspace should not combine both.
+ * In case userspace does it anyway this will result in an -EFAULT for
+ * the unpack. The guest is thus never reaching secure mode.
+ * If userspace plays dirty tricks and decides to map huge pages at a
+ * later point in time, it will receive a segmentation fault or
+ * KVM_RUN will return -EFAULT.
+ */
+ if (folio_test_hugetlb(folio))
+ rc = -EFAULT;
+ else if (folio_test_large(folio))
+ rc = -E2BIG;
+ else if (!pte_write(fw.pte) || (pte_val(fw.pte) & _PAGE_INVALID))
+ rc = -ENXIO;
+ else
+ rc = make_folio_secure(mm, folio, uvcb);
+ folio_walk_end(&fw, vma);
+ mmap_read_unlock(mm);
+
+ if (rc == -E2BIG || rc == -EBUSY)
+ rc = s390_wiggle_split_folio(mm, folio, rc == -E2BIG);
+ folio_put(folio);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(make_hva_secure);
/*
* To be called with the folio locked or with an extra reference! This will
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c
index 02adf151d4de..6d8944d1b4a0 100644
--- a/arch/s390/kvm/gmap.c
+++ b/arch/s390/kvm/gmap.c
@@ -23,116 +23,25 @@
#include "gmap.h"
/**
- * should_export_before_import - Determine whether an export is needed
- * before an import-like operation
- * @uvcb: the Ultravisor control block of the UVC to be performed
- * @mm: the mm of the process
- *
- * Returns whether an export is needed before every import-like operation.
- * This is needed for shared pages, which don't trigger a secure storage
- * exception when accessed from a different guest.
- *
- * Although considered as one, the Unpin Page UVC is not an actual import,
- * so it is not affected.
- *
- * No export is needed also when there is only one protected VM, because the
- * page cannot belong to the wrong VM in that case (there is no "other VM"
- * it can belong to).
- *
- * Return: true if an export is needed before every import, otherwise false.
- */
-static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
-{
- /*
- * The misc feature indicates, among other things, that importing a
- * shared page from a different protected VM will automatically also
- * transfer its ownership.
- */
- if (uv_has_feature(BIT_UV_FEAT_MISC))
- return false;
- if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
- return false;
- return atomic_read(&mm->context.protected_count) > 1;
-}
-
-static int __gmap_make_secure(struct gmap *gmap, struct page *page, void *uvcb)
-{
- struct folio *folio = page_folio(page);
- int rc;
-
- /*
- * Secure pages cannot be huge and userspace should not combine both.
- * In case userspace does it anyway this will result in an -EFAULT for
- * the unpack. The guest is thus never reaching secure mode.
- * If userspace plays dirty tricks and decides to map huge pages at a
- * later point in time, it will receive a segmentation fault or
- * KVM_RUN will return -EFAULT.
- */
- if (folio_test_hugetlb(folio))
- return -EFAULT;
- if (folio_test_large(folio)) {
- mmap_read_unlock(gmap->mm);
- rc = kvm_s390_wiggle_split_folio(gmap->mm, folio, true);
- mmap_read_lock(gmap->mm);
- if (rc)
- return rc;
- folio = page_folio(page);
- }
-
- if (!folio_trylock(folio))
- return -EAGAIN;
- if (should_export_before_import(uvcb, gmap->mm))
- uv_convert_from_secure(folio_to_phys(folio));
- rc = make_folio_secure(folio, uvcb);
- folio_unlock(folio);
-
- /*
- * In theory a race is possible and the folio might have become
- * large again before the folio_trylock() above. In that case, no
- * action is performed and -EAGAIN is returned; the callers will
- * have to try again later.
- * In most cases this implies running the VM again, getting the same
- * exception again, and make another attempt in this function.
- * This is expected to happen extremely rarely.
- */
- if (rc == -E2BIG)
- return -EAGAIN;
- /* The folio has too many references, try to shake some off */
- if (rc == -EBUSY) {
- mmap_read_unlock(gmap->mm);
- kvm_s390_wiggle_split_folio(gmap->mm, folio, false);
- mmap_read_lock(gmap->mm);
- return -EAGAIN;
- }
-
- return rc;
-}
-
-/**
* gmap_make_secure() - make one guest page secure
* @gmap: the guest gmap
* @gaddr: the guest address that needs to be made secure
* @uvcb: the UVCB specifying which operation needs to be performed
*
* Context: needs to be called with kvm->srcu held.
- * Return: 0 on success, < 0 in case of error (see __gmap_make_secure()).
+ * Return: 0 on success, < 0 in case of error.
*/
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
{
struct kvm *kvm = gmap->private;
- struct page *page;
- int rc = 0;
+ unsigned long vmaddr;
lockdep_assert_held(&kvm->srcu);
- page = gfn_to_page(kvm, gpa_to_gfn(gaddr));
- mmap_read_lock(gmap->mm);
- if (page)
- rc = __gmap_make_secure(gmap, page, uvcb);
- kvm_release_page_clean(page);
- mmap_read_unlock(gmap->mm);
-
- return rc;
+ vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr));
+ if (kvm_is_error_hva(vmaddr))
+ return -EFAULT;
+ return make_hva_secure(gmap->mm, vmaddr, uvcb);
}
int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 610dd44a948b..a06a000f196c 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -95,7 +95,7 @@ static int handle_validity(struct kvm_vcpu *vcpu)
vcpu->stat.exit_validity++;
trace_kvm_s390_intercept_validity(vcpu, viwhy);
- KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%pK)", viwhy,
+ KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%p)", viwhy,
current->pid, vcpu->kvm);
/* do not warn on invalid runtime instrumentation mode */
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 07ff0e10cb7f..c0558f054007 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -3161,7 +3161,7 @@ void kvm_s390_gisa_clear(struct kvm *kvm)
if (!gi->origin)
return;
gisa_clear_ipm(gi->origin);
- VM_EVENT(kvm, 3, "gisa 0x%pK cleared", gi->origin);
+ VM_EVENT(kvm, 3, "gisa 0x%p cleared", gi->origin);
}
void kvm_s390_gisa_init(struct kvm *kvm)
@@ -3178,7 +3178,7 @@ void kvm_s390_gisa_init(struct kvm *kvm)
gi->timer.function = gisa_vcpu_kicker;
memset(gi->origin, 0, sizeof(struct kvm_s390_gisa));
gi->origin->next_alert = (u32)virt_to_phys(gi->origin);
- VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin);
+ VM_EVENT(kvm, 3, "gisa 0x%p initialized", gi->origin);
}
void kvm_s390_gisa_enable(struct kvm *kvm)
@@ -3219,7 +3219,7 @@ void kvm_s390_gisa_destroy(struct kvm *kvm)
process_gib_alert_list();
hrtimer_cancel(&gi->timer);
gi->origin = NULL;
- VM_EVENT(kvm, 3, "gisa 0x%pK destroyed", gisa);
+ VM_EVENT(kvm, 3, "gisa 0x%p destroyed", gisa);
}
void kvm_s390_gisa_disable(struct kvm *kvm)
@@ -3468,7 +3468,7 @@ int __init kvm_s390_gib_init(u8 nisc)
}
}
- KVM_EVENT(3, "gib 0x%pK (nisc=%d) initialized", gib, gib->nisc);
+ KVM_EVENT(3, "gib 0x%p (nisc=%d) initialized", gib, gib->nisc);
goto out;
out_unreg_gal:
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ebecb96bacce..8b8e2173fe82 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1020,7 +1020,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
}
mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
- VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
+ VM_EVENT(kvm, 3, "New guest asce: 0x%p",
(void *) kvm->arch.gmap->asce);
break;
}
@@ -3464,7 +3464,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_s390_gisa_init(kvm);
INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
kvm->arch.pv.set_aside = NULL;
- KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
+ KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
return 0;
out_err:
@@ -3527,7 +3527,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_s390_destroy_adapters(kvm);
kvm_s390_clear_float_irqs(kvm);
kvm_s390_vsie_destroy(kvm);
- KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
+ KVM_EVENT(3, "vm 0x%p destroyed", kvm);
}
/* Section: vcpu related */
@@ -3648,7 +3648,7 @@ static int sca_switch_to_extended(struct kvm *kvm)
free_page((unsigned long)old_sca);
- VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
+ VM_EVENT(kvm, 2, "Switched to ESCA (0x%p -> 0x%p)",
old_sca, kvm->arch.sca);
return 0;
}
@@ -4025,7 +4025,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
goto out_free_sie_block;
}
- VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
+ VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%p, sie block at 0x%p",
vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
@@ -4952,6 +4952,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
{
unsigned int flags = 0;
unsigned long gaddr;
+ int rc;
gaddr = current->thread.gmap_teid.addr * PAGE_SIZE;
if (kvm_s390_cur_gmap_fault_is_write())
@@ -4961,16 +4962,6 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
case 0:
vcpu->stat.exit_null++;
break;
- case PGM_NON_SECURE_STORAGE_ACCESS:
- kvm_s390_assert_primary_as(vcpu);
- /*
- * This is normal operation; a page belonging to a protected
- * guest has not been imported yet. Try to import the page into
- * the protected guest.
- */
- if (gmap_convert_to_secure(vcpu->arch.gmap, gaddr) == -EINVAL)
- send_sig(SIGSEGV, current, 0);
- break;
case PGM_SECURE_STORAGE_ACCESS:
case PGM_SECURE_STORAGE_VIOLATION:
kvm_s390_assert_primary_as(vcpu);
@@ -4995,6 +4986,20 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
send_sig(SIGSEGV, current, 0);
}
break;
+ case PGM_NON_SECURE_STORAGE_ACCESS:
+ kvm_s390_assert_primary_as(vcpu);
+ /*
+ * This is normal operation; a page belonging to a protected
+ * guest has not been imported yet. Try to import the page into
+ * the protected guest.
+ */
+ rc = gmap_convert_to_secure(vcpu->arch.gmap, gaddr);
+ if (rc == -EINVAL)
+ send_sig(SIGSEGV, current, 0);
+ if (rc != -ENXIO)
+ break;
+ flags = FAULT_FLAG_WRITE;
+ fallthrough;
case PGM_PROTECTION:
case PGM_SEGMENT_TRANSLATION:
case PGM_PAGE_TRANSLATION:
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 9ac92dbf680d..9e28f165c114 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -56,7 +56,7 @@ TRACE_EVENT(kvm_s390_create_vcpu,
__entry->sie_block = sie_block;
),
- TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK",
+ TP_printk("create cpu %d at 0x%p, sie block at 0x%p",
__entry->id, __entry->vcpu, __entry->sie_block)
);
@@ -255,7 +255,7 @@ TRACE_EVENT(kvm_s390_enable_css,
__entry->kvm = kvm;
),
- TP_printk("enabling channel I/O support (kvm @ %pK)\n",
+ TP_printk("enabling channel I/O support (kvm @ %p)\n",
__entry->kvm)
);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 94d927785800..d14b488e7a1f 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2626,31 +2626,3 @@ int s390_replace_asce(struct gmap *gmap)
return 0;
}
EXPORT_SYMBOL_GPL(s390_replace_asce);
-
-/**
- * kvm_s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split
- * @mm: the mm containing the folio to work on
- * @folio: the folio
- * @split: whether to split a large folio
- *
- * Context: Must be called while holding an extra reference to the folio;
- * the mm lock should not be held.
- */
-int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split)
-{
- int rc;
-
- lockdep_assert_not_held(&mm->mmap_lock);
- folio_wait_writeback(folio);
- lru_add_drain_all();
- if (split) {
- folio_lock(folio);
- rc = split_folio(folio);
- folio_unlock(folio);
-
- if (rc != -EBUSY)
- return rc;
- }
- return -EAGAIN;
-}
-EXPORT_SYMBOL_GPL(kvm_s390_wiggle_split_folio);
diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c
index 4e1b1e3b5658..3f4e20d7b724 100644
--- a/arch/x86/hyperv/hv_vtl.c
+++ b/arch/x86/hyperv/hv_vtl.c
@@ -30,6 +30,7 @@ void __init hv_vtl_init_platform(void)
x86_platform.realmode_init = x86_init_noop;
x86_init.irqs.pre_vector_init = x86_init_noop;
x86_init.timers.timer_init = x86_init_noop;
+ x86_init.resources.probe_roms = x86_init_noop;
/* Avoid searching for BIOS MP tables */
x86_init.mpparse.find_mptable = x86_init_noop;
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index dd68d9ad9b22..ec7880271cf9 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -464,7 +464,6 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
enum hv_mem_host_visibility visibility)
{
struct hv_gpa_range_for_visibility *input;
- u16 pages_processed;
u64 hv_status;
unsigned long flags;
@@ -493,7 +492,7 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
memcpy((void *)input->gpa_page_list, pfn, count * sizeof(*pfn));
hv_status = hv_do_rep_hypercall(
HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, count,
- 0, input, &pages_processed);
+ 0, input, NULL);
local_irq_restore(flags);
if (hv_result_success(hv_status))
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 508c0dad116b..8f8aaf94dc00 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -386,6 +386,7 @@
#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */
#define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */
#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */
+#define X86_FEATURE_IDLE_HLT (15*32+30) /* IDLE HLT intercept */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 32ae3aa50c7e..3bdae454a959 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -27,6 +27,7 @@
#include <linux/kfifo.h>
#include <linux/sched/vhost_task.h>
#include <linux/call_once.h>
+#include <linux/atomic.h>
#include <asm/apic.h>
#include <asm/pvclock-abi.h>
@@ -405,7 +406,7 @@ union kvm_cpu_role {
};
struct kvm_rmap_head {
- unsigned long val;
+ atomic_long_t val;
};
struct kvm_pio_request {
@@ -880,6 +881,7 @@ struct kvm_vcpu_arch {
int cpuid_nent;
struct kvm_cpuid_entry2 *cpuid_entries;
+ bool cpuid_dynamic_bits_dirty;
bool is_amd_compatible;
/*
@@ -909,7 +911,8 @@ struct kvm_vcpu_arch {
int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
gpa_t time;
- struct pvclock_vcpu_time_info hv_clock;
+ s8 pvclock_tsc_shift;
+ u32 pvclock_tsc_mul;
unsigned int hw_tsc_khz;
struct gfn_to_pfn_cache pv_time;
/* set guest stopped flag in pvclock flags field */
@@ -997,8 +1000,8 @@ struct kvm_vcpu_arch {
u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */
u16 vec;
u32 id;
- bool send_user_only;
u32 host_apf_flags;
+ bool send_always;
bool delivery_as_pf_vmexit;
bool pageready_pending;
} apf;
@@ -1053,6 +1056,7 @@ struct kvm_vcpu_arch {
/* Protected Guests */
bool guest_state_protected;
+ bool guest_tsc_protected;
/*
* Set when PDPTS were loaded directly by the userspace without
@@ -1189,6 +1193,8 @@ struct kvm_xen {
struct gfn_to_pfn_cache shinfo_cache;
struct idr evtchn_ports;
unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+
+ struct kvm_xen_hvm_config hvm_config;
};
#endif
@@ -1354,8 +1360,6 @@ struct kvm_arch {
u64 shadow_mmio_value;
- struct iommu_domain *iommu_domain;
- bool iommu_noncoherent;
#define __KVM_HAVE_ARCH_NONCOHERENT_DMA
atomic_t noncoherent_dma_count;
#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE
@@ -1411,8 +1415,6 @@ struct kvm_arch {
struct delayed_work kvmclock_update_work;
struct delayed_work kvmclock_sync_work;
- struct kvm_xen_hvm_config xen_hvm_config;
-
/* reads protected by irq_srcu, writes by irq_lock */
struct hlist_head mask_notifier_list;
@@ -1470,8 +1472,13 @@ struct kvm_arch {
struct once nx_once;
#ifdef CONFIG_X86_64
- /* The number of TDP MMU pages across all roots. */
+#ifdef CONFIG_KVM_PROVE_MMU
+ /*
+ * The number of TDP MMU pages across all roots. Used only to sanity
+ * check that KVM isn't leaking TDP MMU pages.
+ */
atomic64_t tdp_mmu_pages;
+#endif
/*
* List of struct kvm_mmu_pages being used as roots.
@@ -1479,6 +1486,7 @@ struct kvm_arch {
* tdp_mmu_page set.
*
* For reads, this list is protected by:
+ * RCU alone or
* the MMU lock in read mode + RCU or
* the MMU lock in write mode
*
@@ -2164,8 +2172,8 @@ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu);
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload);
-void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
-void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
+void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr,
+ bool has_error_code, u32 error_code);
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
struct x86_exception *fault);
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index dcbccdb280f9..acb85b9346d8 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -212,8 +212,16 @@ struct snp_psc_desc {
#define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK)
/*
- * Error codes related to GHCB input that can be communicated back to the guest
- * by setting the lower 32-bits of the GHCB SW_EXITINFO1 field to 2.
+ * GHCB-defined return codes that are communicated back to the guest via
+ * SW_EXITINFO1.
+ */
+#define GHCB_HV_RESP_NO_ACTION 0
+#define GHCB_HV_RESP_ISSUE_EXCEPTION 1
+#define GHCB_HV_RESP_MALFORMED_INPUT 2
+
+/*
+ * GHCB-defined sub-error codes for malformed input (see above) that are
+ * communicated back to the guest via SW_EXITINFO2[31:0].
*/
#define GHCB_ERR_NOT_REGISTERED 1
#define GHCB_ERR_INVALID_USAGE 2
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index e2fac21471f5..9b7fa99ae951 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -116,6 +116,7 @@ enum {
INTERCEPT_INVPCID,
INTERCEPT_MCOMMIT,
INTERCEPT_TLBSYNC,
+ INTERCEPT_IDLE_HLT = 166,
};
@@ -290,10 +291,6 @@ static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_
#define SVM_SEV_FEAT_ALTERNATE_INJECTION BIT(4)
#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5)
-#define SVM_SEV_FEAT_INT_INJ_MODES \
- (SVM_SEV_FEAT_RESTRICTED_INJECTION | \
- SVM_SEV_FEAT_ALTERNATE_INJECTION)
-
struct vmcb_seg {
u16 selector;
u16 attrib;
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index f7fd4369b821..8707361b24da 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -580,18 +580,22 @@ enum vm_entry_failure_code {
/*
* Exit Qualifications for EPT Violations
*/
-#define EPT_VIOLATION_ACC_READ_BIT 0
-#define EPT_VIOLATION_ACC_WRITE_BIT 1
-#define EPT_VIOLATION_ACC_INSTR_BIT 2
-#define EPT_VIOLATION_RWX_SHIFT 3
-#define EPT_VIOLATION_GVA_IS_VALID_BIT 7
-#define EPT_VIOLATION_GVA_TRANSLATED_BIT 8
-#define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT)
-#define EPT_VIOLATION_ACC_WRITE (1 << EPT_VIOLATION_ACC_WRITE_BIT)
-#define EPT_VIOLATION_ACC_INSTR (1 << EPT_VIOLATION_ACC_INSTR_BIT)
-#define EPT_VIOLATION_RWX_MASK (VMX_EPT_RWX_MASK << EPT_VIOLATION_RWX_SHIFT)
-#define EPT_VIOLATION_GVA_IS_VALID (1 << EPT_VIOLATION_GVA_IS_VALID_BIT)
-#define EPT_VIOLATION_GVA_TRANSLATED (1 << EPT_VIOLATION_GVA_TRANSLATED_BIT)
+#define EPT_VIOLATION_ACC_READ BIT(0)
+#define EPT_VIOLATION_ACC_WRITE BIT(1)
+#define EPT_VIOLATION_ACC_INSTR BIT(2)
+#define EPT_VIOLATION_PROT_READ BIT(3)
+#define EPT_VIOLATION_PROT_WRITE BIT(4)
+#define EPT_VIOLATION_PROT_EXEC BIT(5)
+#define EPT_VIOLATION_PROT_MASK (EPT_VIOLATION_PROT_READ | \
+ EPT_VIOLATION_PROT_WRITE | \
+ EPT_VIOLATION_PROT_EXEC)
+#define EPT_VIOLATION_GVA_IS_VALID BIT(7)
+#define EPT_VIOLATION_GVA_TRANSLATED BIT(8)
+
+#define EPT_VIOLATION_RWX_TO_PROT(__epte) (((__epte) & VMX_EPT_RWX_MASK) << 3)
+
+static_assert(EPT_VIOLATION_RWX_TO_PROT(VMX_EPT_RWX_MASK) ==
+ (EPT_VIOLATION_PROT_READ | EPT_VIOLATION_PROT_WRITE | EPT_VIOLATION_PROT_EXEC));
/*
* Exit Qualifications for NOTIFY VM EXIT
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 9e75da97bce0..460306b35a4b 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -559,6 +559,9 @@ struct kvm_x86_mce {
#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7)
#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8)
+#define KVM_XEN_MSR_MIN_INDEX 0x40000000u
+#define KVM_XEN_MSR_MAX_INDEX 0x4fffffffu
+
struct kvm_xen_hvm_config {
__u32 flags;
__u32 msr;
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 1814b413fd57..ec1321248dac 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -95,6 +95,7 @@
#define SVM_EXIT_CR14_WRITE_TRAP 0x09e
#define SVM_EXIT_CR15_WRITE_TRAP 0x09f
#define SVM_EXIT_INVPCID 0x0a2
+#define SVM_EXIT_IDLE_HLT 0x0a6
#define SVM_EXIT_NPF 0x400
#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402
@@ -224,6 +225,7 @@
{ SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \
{ SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \
{ SVM_EXIT_INVPCID, "invpcid" }, \
+ { SVM_EXIT_IDLE_HLT, "idle-halt" }, \
{ SVM_EXIT_NPF, "npf" }, \
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
{ SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index c69b1bc45483..138689b8e1d8 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -1074,7 +1074,7 @@ static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t siz
if (ret != UCODE_OK)
return ret;
- for_each_node(nid) {
+ for_each_node_with_cpus(nid) {
cpu = cpumask_first(cpumask_of_node(nid));
c = &cpu_data(cpu);
diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
index 22b65a5f5ec6..7f8d1e11dbee 100644
--- a/arch/x86/kernel/cpu/sgx/driver.c
+++ b/arch/x86/kernel/cpu/sgx/driver.c
@@ -150,13 +150,15 @@ int __init sgx_drv_init(void)
u64 xfrm_mask;
int ret;
- if (!cpu_feature_enabled(X86_FEATURE_SGX_LC))
+ if (!cpu_feature_enabled(X86_FEATURE_SGX_LC)) {
+ pr_info("SGX disabled: SGX launch control CPU feature is not available, /dev/sgx_enclave disabled.\n");
return -ENODEV;
+ }
cpuid_count(SGX_CPUID, 0, &eax, &ebx, &ecx, &edx);
if (!(eax & 1)) {
- pr_err("SGX disabled: SGX1 instruction support not available.\n");
+ pr_info("SGX disabled: SGX1 instruction support not available, /dev/sgx_enclave disabled.\n");
return -ENODEV;
}
@@ -173,8 +175,10 @@ int __init sgx_drv_init(void)
}
ret = misc_register(&sgx_dev_enclave);
- if (ret)
+ if (ret) {
+ pr_info("SGX disabled: Unable to register the /dev/sgx_enclave driver (%d).\n", ret);
return ret;
+ }
return 0;
}
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 00189cdeb775..cb3f900c46fc 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -26,6 +26,7 @@
#include <linux/export.h>
#include <linux/clocksource.h>
#include <linux/cpu.h>
+#include <linux/efi.h>
#include <linux/reboot.h>
#include <linux/static_call.h>
#include <asm/div64.h>
@@ -429,6 +430,9 @@ static void __init vmware_platform_setup(void)
pr_warn("Failed to get TSC freq from the hypervisor\n");
}
+ if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !efi_enabled(EFI_BOOT))
+ x86_init.mpparse.find_mptable = mpparse_find_mptable;
+
vmware_paravirt_ops_setup();
#ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index ea2c4f21c1ca..fe8ea8c097de 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -22,6 +22,7 @@ config KVM_X86
select KVM_COMMON
select KVM_GENERIC_MMU_NOTIFIER
select KVM_ELIDE_TLB_FLUSH_IF_YOUNG
+ select KVM_MMU_LOCKLESS_AGING
select HAVE_KVM_IRQCHIP
select HAVE_KVM_PFNCACHE
select HAVE_KVM_DIRTY_RING_TSO
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 121edf1f2a79..571c906ffcbf 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -58,25 +58,24 @@ void __init kvm_init_xstate_sizes(void)
u32 xstate_required_size(u64 xstate_bv, bool compacted)
{
- int feature_bit = 0;
u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
+ int i;
xstate_bv &= XFEATURE_MASK_EXTEND;
- while (xstate_bv) {
- if (xstate_bv & 0x1) {
- struct cpuid_xstate_sizes *xs = &xstate_sizes[feature_bit];
- u32 offset;
-
- /* ECX[1]: 64B alignment in compacted form */
- if (compacted)
- offset = (xs->ecx & 0x2) ? ALIGN(ret, 64) : ret;
- else
- offset = xs->ebx;
- ret = max(ret, offset + xs->eax);
- }
+ for (i = XFEATURE_YMM; i < ARRAY_SIZE(xstate_sizes) && xstate_bv; i++) {
+ struct cpuid_xstate_sizes *xs = &xstate_sizes[i];
+ u32 offset;
- xstate_bv >>= 1;
- feature_bit++;
+ if (!(xstate_bv & BIT_ULL(i)))
+ continue;
+
+ /* ECX[1]: 64B alignment in compacted form */
+ if (compacted)
+ offset = (xs->ecx & 0x2) ? ALIGN(ret, 64) : ret;
+ else
+ offset = xs->ebx;
+ ret = max(ret, offset + xs->eax);
+ xstate_bv &= ~BIT_ULL(i);
}
return ret;
@@ -196,6 +195,7 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu)
}
static u32 kvm_apply_cpuid_pv_features_quirk(struct kvm_vcpu *vcpu);
+static void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
/* Check whether the supplied CPUID data is equal to what is already set for the vCPU. */
static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
@@ -300,10 +300,12 @@ static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
guest_cpu_cap_change(vcpu, x86_feature, has_feature);
}
-void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
+static void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
+ vcpu->arch.cpuid_dynamic_bits_dirty = false;
+
best = kvm_find_cpuid_entry(vcpu, 1);
if (best) {
kvm_update_feature_runtime(vcpu, best, X86_FEATURE_OSXSAVE,
@@ -333,7 +335,6 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
}
-EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime);
static bool kvm_cpuid_has_hyperv(struct kvm_vcpu *vcpu)
{
@@ -646,6 +647,9 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
if (cpuid->nent < vcpu->arch.cpuid_nent)
return -E2BIG;
+ if (vcpu->arch.cpuid_dynamic_bits_dirty)
+ kvm_update_cpuid_runtime(vcpu);
+
if (copy_to_user(entries, vcpu->arch.cpuid_entries,
vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
return -EFAULT;
@@ -1423,8 +1427,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
}
break;
case 0xa: { /* Architectural Performance Monitoring */
- union cpuid10_eax eax;
- union cpuid10_edx edx;
+ union cpuid10_eax eax = { };
+ union cpuid10_edx edx = { };
if (!enable_pmu || !static_cpu_has(X86_FEATURE_ARCH_PERFMON)) {
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
@@ -1440,8 +1444,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
if (kvm_pmu_cap.version)
edx.split.anythread_deprecated = 1;
- edx.split.reserved1 = 0;
- edx.split.reserved2 = 0;
entry->eax = eax.full;
entry->ebx = kvm_pmu_cap.events_mask;
@@ -1704,7 +1706,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
phys_as = entry->eax & 0xff;
g_phys_as = phys_as;
if (kvm_mmu_get_max_tdp_level() < 5)
- g_phys_as = min(g_phys_as, 48);
+ g_phys_as = min(g_phys_as, 48U);
}
entry->eax = phys_as | (virt_as << 8) | (g_phys_as << 16);
@@ -1759,7 +1761,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
break;
/* AMD Extended Performance Monitoring and Debug */
case 0x80000022: {
- union cpuid_0x80000022_ebx ebx;
+ union cpuid_0x80000022_ebx ebx = { };
entry->ecx = entry->edx = 0;
if (!enable_pmu || !kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2)) {
@@ -1769,13 +1771,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
cpuid_entry_override(entry, CPUID_8000_0022_EAX);
- if (kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2))
- ebx.split.num_core_pmc = kvm_pmu_cap.num_counters_gp;
- else if (kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE))
- ebx.split.num_core_pmc = AMD64_NUM_COUNTERS_CORE;
- else
- ebx.split.num_core_pmc = AMD64_NUM_COUNTERS;
-
+ ebx.split.num_core_pmc = kvm_pmu_cap.num_counters_gp;
entry->ebx = ebx.full;
break;
}
@@ -1985,6 +1981,9 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
struct kvm_cpuid_entry2 *entry;
bool exact, used_max_basic = false;
+ if (vcpu->arch.cpuid_dynamic_bits_dirty)
+ kvm_update_cpuid_runtime(vcpu);
+
entry = kvm_find_cpuid_entry_index(vcpu, function, index);
exact = !!entry;
@@ -2000,12 +1999,29 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
*edx = entry->edx;
if (function == 7 && index == 0) {
u64 data;
- if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
+ if ((*ebx & (feature_bit(RTM) | feature_bit(HLE))) &&
+ !__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
(data & TSX_CTRL_CPUID_CLEAR))
*ebx &= ~(feature_bit(RTM) | feature_bit(HLE));
} else if (function == 0x80000007) {
if (kvm_hv_invtsc_suppressed(vcpu))
*edx &= ~feature_bit(CONSTANT_TSC);
+ } else if (IS_ENABLED(CONFIG_KVM_XEN) &&
+ kvm_xen_is_tsc_leaf(vcpu, function)) {
+ /*
+ * Update guest TSC frequency information if necessary.
+ * Ignore failures, there is no sane value that can be
+ * provided if KVM can't get the TSC frequency.
+ */
+ if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu))
+ kvm_guest_time_update(vcpu);
+
+ if (index == 1) {
+ *ecx = vcpu->arch.pvclock_tsc_mul;
+ *edx = vcpu->arch.pvclock_tsc_shift;
+ } else if (index == 2) {
+ *eax = vcpu->arch.hw_tsc_khz;
+ }
}
} else {
*eax = *ebx = *ecx = *edx = 0;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 67d80aa72d50..d2884162a46a 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -11,7 +11,6 @@ extern u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
void kvm_set_cpu_caps(void);
void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu);
-void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu,
u32 function, u32 index);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
@@ -232,6 +231,14 @@ static __always_inline bool guest_cpu_cap_has(struct kvm_vcpu *vcpu,
{
unsigned int x86_leaf = __feature_leaf(x86_feature);
+ /*
+ * Except for MWAIT, querying dynamic feature bits is disallowed, so
+ * that KVM can defer runtime updates until the next CPUID emulation.
+ */
+ BUILD_BUG_ON(x86_feature == X86_FEATURE_APIC ||
+ x86_feature == X86_FEATURE_OSXSAVE ||
+ x86_feature == X86_FEATURE_OSPKE);
+
return vcpu->arch.cpu_caps[x86_leaf] & __feature_bit(x86_feature);
}
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 60986f67c35a..1349e278cd2a 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -477,8 +477,11 @@ static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
.dst_val = ctxt->dst.val64,
.src_bytes = ctxt->src.bytes,
.dst_bytes = ctxt->dst.bytes,
+ .src_type = ctxt->src.type,
+ .dst_type = ctxt->dst.type,
.ad_bytes = ctxt->ad_bytes,
- .next_rip = ctxt->eip,
+ .rip = ctxt->eip,
+ .next_rip = ctxt->_eip,
};
return ctxt->ops->intercept(ctxt, &info, stage);
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 8dec646e764b..a8fb19940975 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -567,7 +567,7 @@ static void pic_irq_request(struct kvm *kvm, int level)
{
struct kvm_pic *s = kvm->arch.vpic;
- if (!s->output)
+ if (!s->output && level)
s->wakeup_needed = true;
s->output = level;
}
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 73072585e164..c1df5acfacaf 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -44,7 +44,10 @@ struct x86_instruction_info {
u64 dst_val; /* value of destination operand */
u8 src_bytes; /* size of source operand */
u8 dst_bytes; /* size of destination operand */
+ u8 src_type; /* type of source operand */
+ u8 dst_type; /* type of destination operand */
u8 ad_bytes; /* size of src/dst address */
+ u64 rip; /* rip of the instruction */
u64 next_rip; /* rip following the instruction */
};
@@ -272,8 +275,10 @@ struct operand {
};
};
+#define X86_MAX_INSTRUCTION_LENGTH 15
+
struct fetch_cache {
- u8 data[15];
+ u8 data[X86_MAX_INSTRUCTION_LENGTH];
u8 *ptr;
u8 *end;
};
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a009c94c26c2..9dbc0f5d9865 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -221,13 +221,6 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
}
}
-static void kvm_apic_map_free(struct rcu_head *rcu)
-{
- struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
-
- kvfree(map);
-}
-
static int kvm_recalculate_phys_map(struct kvm_apic_map *new,
struct kvm_vcpu *vcpu,
bool *xapic_id_mismatch)
@@ -489,7 +482,7 @@ out:
mutex_unlock(&kvm->arch.apic_map_lock);
if (old)
- call_rcu(&old->rcu, kvm_apic_map_free);
+ kvfree_rcu(old, rcu);
kvm_make_scan_ioapic_request(kvm);
}
@@ -2593,7 +2586,7 @@ static void __kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value)
vcpu->arch.apic_base = value;
if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
- kvm_update_cpuid_runtime(vcpu);
+ vcpu->arch.cpuid_dynamic_bits_dirty = true;
if (!apic)
return;
@@ -3397,9 +3390,9 @@ int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) {
kvm_vcpu_reset(vcpu, true);
if (kvm_vcpu_is_bsp(apic->vcpu))
- vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
else
- vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_INIT_RECEIVED);
}
if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events)) {
if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
@@ -3408,7 +3401,7 @@ int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
sipi_vector = apic->sipi_vector;
kvm_x86_call(vcpu_deliver_sipi_vector)(vcpu,
sipi_vector);
- vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
}
}
return 0;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 8160870398b9..63bb77ee1bb1 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -501,7 +501,7 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
return false;
}
- if (!spte_has_volatile_bits(old_spte))
+ if (!spte_needs_atomic_update(old_spte))
__update_clear_spte_fast(sptep, new_spte);
else
old_spte = __update_clear_spte_slow(sptep, new_spte);
@@ -524,7 +524,7 @@ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
int level = sptep_to_sp(sptep)->role.level;
if (!is_shadow_present_pte(old_spte) ||
- !spte_has_volatile_bits(old_spte))
+ !spte_needs_atomic_update(old_spte))
__update_clear_spte_fast(sptep, SHADOW_NONPRESENT_VALUE);
else
old_spte = __update_clear_spte_slow(sptep, SHADOW_NONPRESENT_VALUE);
@@ -853,32 +853,173 @@ static struct kvm_memory_slot *gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu
* About rmap_head encoding:
*
* If the bit zero of rmap_head->val is clear, then it points to the only spte
- * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct
+ * in this rmap chain. Otherwise, (rmap_head->val & ~3) points to a struct
* pte_list_desc containing more mappings.
*/
#define KVM_RMAP_MANY BIT(0)
/*
+ * rmaps and PTE lists are mostly protected by mmu_lock (the shadow MMU always
+ * operates with mmu_lock held for write), but rmaps can be walked without
+ * holding mmu_lock so long as the caller can tolerate SPTEs in the rmap chain
+ * being zapped/dropped _while the rmap is locked_.
+ *
+ * Other than the KVM_RMAP_LOCKED flag, modifications to rmap entries must be
+ * done while holding mmu_lock for write. This allows a task walking rmaps
+ * without holding mmu_lock to concurrently walk the same entries as a task
+ * that is holding mmu_lock but _not_ the rmap lock. Neither task will modify
+ * the rmaps, thus the walks are stable.
+ *
+ * As alluded to above, SPTEs in rmaps are _not_ protected by KVM_RMAP_LOCKED,
+ * only the rmap chains themselves are protected. E.g. holding an rmap's lock
+ * ensures all "struct pte_list_desc" fields are stable.
+ */
+#define KVM_RMAP_LOCKED BIT(1)
+
+static unsigned long __kvm_rmap_lock(struct kvm_rmap_head *rmap_head)
+{
+ unsigned long old_val, new_val;
+
+ lockdep_assert_preemption_disabled();
+
+ /*
+ * Elide the lock if the rmap is empty, as lockless walkers (read-only
+ * mode) don't need to (and can't) walk an empty rmap, nor can they add
+ * entries to the rmap. I.e. the only paths that process empty rmaps
+ * do so while holding mmu_lock for write, and are mutually exclusive.
+ */
+ old_val = atomic_long_read(&rmap_head->val);
+ if (!old_val)
+ return 0;
+
+ do {
+ /*
+ * If the rmap is locked, wait for it to be unlocked before
+ * trying acquire the lock, e.g. to avoid bouncing the cache
+ * line.
+ */
+ while (old_val & KVM_RMAP_LOCKED) {
+ cpu_relax();
+ old_val = atomic_long_read(&rmap_head->val);
+ }
+
+ /*
+ * Recheck for an empty rmap, it may have been purged by the
+ * task that held the lock.
+ */
+ if (!old_val)
+ return 0;
+
+ new_val = old_val | KVM_RMAP_LOCKED;
+ /*
+ * Use try_cmpxchg_acquire() to prevent reads and writes to the rmap
+ * from being reordered outside of the critical section created by
+ * __kvm_rmap_lock().
+ *
+ * Pairs with the atomic_long_set_release() in kvm_rmap_unlock().
+ *
+ * For the !old_val case, no ordering is needed, as there is no rmap
+ * to walk.
+ */
+ } while (!atomic_long_try_cmpxchg_acquire(&rmap_head->val, &old_val, new_val));
+
+ /*
+ * Return the old value, i.e. _without_ the LOCKED bit set. It's
+ * impossible for the return value to be 0 (see above), i.e. the read-
+ * only unlock flow can't get a false positive and fail to unlock.
+ */
+ return old_val;
+}
+
+static unsigned long kvm_rmap_lock(struct kvm *kvm,
+ struct kvm_rmap_head *rmap_head)
+{
+ lockdep_assert_held_write(&kvm->mmu_lock);
+
+ return __kvm_rmap_lock(rmap_head);
+}
+
+static void __kvm_rmap_unlock(struct kvm_rmap_head *rmap_head,
+ unsigned long val)
+{
+ KVM_MMU_WARN_ON(val & KVM_RMAP_LOCKED);
+ /*
+ * Ensure that all accesses to the rmap have completed before unlocking
+ * the rmap.
+ *
+ * Pairs with the atomic_long_try_cmpxchg_acquire() in __kvm_rmap_lock().
+ */
+ atomic_long_set_release(&rmap_head->val, val);
+}
+
+static void kvm_rmap_unlock(struct kvm *kvm,
+ struct kvm_rmap_head *rmap_head,
+ unsigned long new_val)
+{
+ lockdep_assert_held_write(&kvm->mmu_lock);
+
+ __kvm_rmap_unlock(rmap_head, new_val);
+}
+
+static unsigned long kvm_rmap_get(struct kvm_rmap_head *rmap_head)
+{
+ return atomic_long_read(&rmap_head->val) & ~KVM_RMAP_LOCKED;
+}
+
+/*
+ * If mmu_lock isn't held, rmaps can only be locked in read-only mode. The
+ * actual locking is the same, but the caller is disallowed from modifying the
+ * rmap, and so the unlock flow is a nop if the rmap is/was empty.
+ */
+static unsigned long kvm_rmap_lock_readonly(struct kvm_rmap_head *rmap_head)
+{
+ unsigned long rmap_val;
+
+ preempt_disable();
+ rmap_val = __kvm_rmap_lock(rmap_head);
+
+ if (!rmap_val)
+ preempt_enable();
+
+ return rmap_val;
+}
+
+static void kvm_rmap_unlock_readonly(struct kvm_rmap_head *rmap_head,
+ unsigned long old_val)
+{
+ if (!old_val)
+ return;
+
+ KVM_MMU_WARN_ON(old_val != kvm_rmap_get(rmap_head));
+
+ __kvm_rmap_unlock(rmap_head, old_val);
+ preempt_enable();
+}
+
+/*
* Returns the number of pointers in the rmap chain, not counting the new one.
*/
-static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
- struct kvm_rmap_head *rmap_head)
+static int pte_list_add(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+ u64 *spte, struct kvm_rmap_head *rmap_head)
{
+ unsigned long old_val, new_val;
struct pte_list_desc *desc;
int count = 0;
- if (!rmap_head->val) {
- rmap_head->val = (unsigned long)spte;
- } else if (!(rmap_head->val & KVM_RMAP_MANY)) {
+ old_val = kvm_rmap_lock(kvm, rmap_head);
+
+ if (!old_val) {
+ new_val = (unsigned long)spte;
+ } else if (!(old_val & KVM_RMAP_MANY)) {
desc = kvm_mmu_memory_cache_alloc(cache);
- desc->sptes[0] = (u64 *)rmap_head->val;
+ desc->sptes[0] = (u64 *)old_val;
desc->sptes[1] = spte;
desc->spte_count = 2;
desc->tail_count = 0;
- rmap_head->val = (unsigned long)desc | KVM_RMAP_MANY;
+ new_val = (unsigned long)desc | KVM_RMAP_MANY;
++count;
} else {
- desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY);
+ desc = (struct pte_list_desc *)(old_val & ~KVM_RMAP_MANY);
count = desc->tail_count + desc->spte_count;
/*
@@ -887,21 +1028,25 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
*/
if (desc->spte_count == PTE_LIST_EXT) {
desc = kvm_mmu_memory_cache_alloc(cache);
- desc->more = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY);
+ desc->more = (struct pte_list_desc *)(old_val & ~KVM_RMAP_MANY);
desc->spte_count = 0;
desc->tail_count = count;
- rmap_head->val = (unsigned long)desc | KVM_RMAP_MANY;
+ new_val = (unsigned long)desc | KVM_RMAP_MANY;
+ } else {
+ new_val = old_val;
}
desc->sptes[desc->spte_count++] = spte;
}
+
+ kvm_rmap_unlock(kvm, rmap_head, new_val);
+
return count;
}
-static void pte_list_desc_remove_entry(struct kvm *kvm,
- struct kvm_rmap_head *rmap_head,
+static void pte_list_desc_remove_entry(struct kvm *kvm, unsigned long *rmap_val,
struct pte_list_desc *desc, int i)
{
- struct pte_list_desc *head_desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY);
+ struct pte_list_desc *head_desc = (struct pte_list_desc *)(*rmap_val & ~KVM_RMAP_MANY);
int j = head_desc->spte_count - 1;
/*
@@ -928,9 +1073,9 @@ static void pte_list_desc_remove_entry(struct kvm *kvm,
* head at the next descriptor, i.e. the new head.
*/
if (!head_desc->more)
- rmap_head->val = 0;
+ *rmap_val = 0;
else
- rmap_head->val = (unsigned long)head_desc->more | KVM_RMAP_MANY;
+ *rmap_val = (unsigned long)head_desc->more | KVM_RMAP_MANY;
mmu_free_pte_list_desc(head_desc);
}
@@ -938,24 +1083,26 @@ static void pte_list_remove(struct kvm *kvm, u64 *spte,
struct kvm_rmap_head *rmap_head)
{
struct pte_list_desc *desc;
+ unsigned long rmap_val;
int i;
- if (KVM_BUG_ON_DATA_CORRUPTION(!rmap_head->val, kvm))
- return;
+ rmap_val = kvm_rmap_lock(kvm, rmap_head);
+ if (KVM_BUG_ON_DATA_CORRUPTION(!rmap_val, kvm))
+ goto out;
- if (!(rmap_head->val & KVM_RMAP_MANY)) {
- if (KVM_BUG_ON_DATA_CORRUPTION((u64 *)rmap_head->val != spte, kvm))
- return;
+ if (!(rmap_val & KVM_RMAP_MANY)) {
+ if (KVM_BUG_ON_DATA_CORRUPTION((u64 *)rmap_val != spte, kvm))
+ goto out;
- rmap_head->val = 0;
+ rmap_val = 0;
} else {
- desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY);
+ desc = (struct pte_list_desc *)(rmap_val & ~KVM_RMAP_MANY);
while (desc) {
for (i = 0; i < desc->spte_count; ++i) {
if (desc->sptes[i] == spte) {
- pte_list_desc_remove_entry(kvm, rmap_head,
+ pte_list_desc_remove_entry(kvm, &rmap_val,
desc, i);
- return;
+ goto out;
}
}
desc = desc->more;
@@ -963,6 +1110,9 @@ static void pte_list_remove(struct kvm *kvm, u64 *spte,
KVM_BUG_ON_DATA_CORRUPTION(true, kvm);
}
+
+out:
+ kvm_rmap_unlock(kvm, rmap_head, rmap_val);
}
static void kvm_zap_one_rmap_spte(struct kvm *kvm,
@@ -977,17 +1127,19 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm,
struct kvm_rmap_head *rmap_head)
{
struct pte_list_desc *desc, *next;
+ unsigned long rmap_val;
int i;
- if (!rmap_head->val)
+ rmap_val = kvm_rmap_lock(kvm, rmap_head);
+ if (!rmap_val)
return false;
- if (!(rmap_head->val & KVM_RMAP_MANY)) {
- mmu_spte_clear_track_bits(kvm, (u64 *)rmap_head->val);
+ if (!(rmap_val & KVM_RMAP_MANY)) {
+ mmu_spte_clear_track_bits(kvm, (u64 *)rmap_val);
goto out;
}
- desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY);
+ desc = (struct pte_list_desc *)(rmap_val & ~KVM_RMAP_MANY);
for (; desc; desc = next) {
for (i = 0; i < desc->spte_count; i++)
@@ -997,20 +1149,21 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm,
}
out:
/* rmap_head is meaningless now, remember to reset it */
- rmap_head->val = 0;
+ kvm_rmap_unlock(kvm, rmap_head, 0);
return true;
}
unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
{
+ unsigned long rmap_val = kvm_rmap_get(rmap_head);
struct pte_list_desc *desc;
- if (!rmap_head->val)
+ if (!rmap_val)
return 0;
- else if (!(rmap_head->val & KVM_RMAP_MANY))
+ else if (!(rmap_val & KVM_RMAP_MANY))
return 1;
- desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY);
+ desc = (struct pte_list_desc *)(rmap_val & ~KVM_RMAP_MANY);
return desc->tail_count + desc->spte_count;
}
@@ -1053,6 +1206,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
*/
struct rmap_iterator {
/* private fields */
+ struct rmap_head *head;
struct pte_list_desc *desc; /* holds the sptep if not NULL */
int pos; /* index of the sptep */
};
@@ -1067,23 +1221,19 @@ struct rmap_iterator {
static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
struct rmap_iterator *iter)
{
- u64 *sptep;
+ unsigned long rmap_val = kvm_rmap_get(rmap_head);
- if (!rmap_head->val)
+ if (!rmap_val)
return NULL;
- if (!(rmap_head->val & KVM_RMAP_MANY)) {
+ if (!(rmap_val & KVM_RMAP_MANY)) {
iter->desc = NULL;
- sptep = (u64 *)rmap_head->val;
- goto out;
+ return (u64 *)rmap_val;
}
- iter->desc = (struct pte_list_desc *)(rmap_head->val & ~KVM_RMAP_MANY);
+ iter->desc = (struct pte_list_desc *)(rmap_val & ~KVM_RMAP_MANY);
iter->pos = 0;
- sptep = iter->desc->sptes[iter->pos];
-out:
- BUG_ON(!is_shadow_present_pte(*sptep));
- return sptep;
+ return iter->desc->sptes[iter->pos];
}
/*
@@ -1093,14 +1243,11 @@ out:
*/
static u64 *rmap_get_next(struct rmap_iterator *iter)
{
- u64 *sptep;
-
if (iter->desc) {
if (iter->pos < PTE_LIST_EXT - 1) {
++iter->pos;
- sptep = iter->desc->sptes[iter->pos];
- if (sptep)
- goto out;
+ if (iter->desc->sptes[iter->pos])
+ return iter->desc->sptes[iter->pos];
}
iter->desc = iter->desc->more;
@@ -1108,20 +1255,24 @@ static u64 *rmap_get_next(struct rmap_iterator *iter)
if (iter->desc) {
iter->pos = 0;
/* desc->sptes[0] cannot be NULL */
- sptep = iter->desc->sptes[iter->pos];
- goto out;
+ return iter->desc->sptes[iter->pos];
}
}
return NULL;
-out:
- BUG_ON(!is_shadow_present_pte(*sptep));
- return sptep;
}
-#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_) \
- for (_spte_ = rmap_get_first(_rmap_head_, _iter_); \
- _spte_; _spte_ = rmap_get_next(_iter_))
+#define __for_each_rmap_spte(_rmap_head_, _iter_, _sptep_) \
+ for (_sptep_ = rmap_get_first(_rmap_head_, _iter_); \
+ _sptep_; _sptep_ = rmap_get_next(_iter_))
+
+#define for_each_rmap_spte(_rmap_head_, _iter_, _sptep_) \
+ __for_each_rmap_spte(_rmap_head_, _iter_, _sptep_) \
+ if (!WARN_ON_ONCE(!is_shadow_present_pte(*(_sptep_)))) \
+
+#define for_each_rmap_spte_lockless(_rmap_head_, _iter_, _sptep_, _spte_) \
+ __for_each_rmap_spte(_rmap_head_, _iter_, _sptep_) \
+ if (is_shadow_present_pte(_spte_ = mmu_spte_get_lockless(sptep)))
static void drop_spte(struct kvm *kvm, u64 *sptep)
{
@@ -1207,12 +1358,13 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct rmap_iterator iter;
bool flush = false;
- for_each_rmap_spte(rmap_head, &iter, sptep)
+ for_each_rmap_spte(rmap_head, &iter, sptep) {
if (spte_ad_need_write_protect(*sptep))
flush |= test_and_clear_bit(PT_WRITABLE_SHIFT,
(unsigned long *)sptep);
else
flush |= spte_clear_dirty(sptep);
+ }
return flush;
}
@@ -1401,7 +1553,7 @@ static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator)
while (++iterator->rmap <= iterator->end_rmap) {
iterator->gfn += KVM_PAGES_PER_HPAGE(iterator->level);
- if (iterator->rmap->val)
+ if (atomic_long_read(&iterator->rmap->val))
return;
}
@@ -1533,7 +1685,7 @@ static void __rmap_add(struct kvm *kvm,
kvm_update_page_stats(kvm, sp->role.level, 1);
rmap_head = gfn_to_rmap(gfn, sp->role.level, slot);
- rmap_count = pte_list_add(cache, spte, rmap_head);
+ rmap_count = pte_list_add(kvm, cache, spte, rmap_head);
if (rmap_count > kvm->stat.max_mmu_rmap_size)
kvm->stat.max_mmu_rmap_size = rmap_count;
@@ -1552,51 +1704,67 @@ static void rmap_add(struct kvm_vcpu *vcpu, const struct kvm_memory_slot *slot,
}
static bool kvm_rmap_age_gfn_range(struct kvm *kvm,
- struct kvm_gfn_range *range, bool test_only)
+ struct kvm_gfn_range *range,
+ bool test_only)
{
- struct slot_rmap_walk_iterator iterator;
+ struct kvm_rmap_head *rmap_head;
struct rmap_iterator iter;
+ unsigned long rmap_val;
bool young = false;
u64 *sptep;
+ gfn_t gfn;
+ int level;
+ u64 spte;
- for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
- range->start, range->end - 1, &iterator) {
- for_each_rmap_spte(iterator.rmap, &iter, sptep) {
- u64 spte = *sptep;
+ for (level = PG_LEVEL_4K; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) {
+ for (gfn = range->start; gfn < range->end;
+ gfn += KVM_PAGES_PER_HPAGE(level)) {
+ rmap_head = gfn_to_rmap(gfn, level, range->slot);
+ rmap_val = kvm_rmap_lock_readonly(rmap_head);
- if (!is_accessed_spte(spte))
- continue;
+ for_each_rmap_spte_lockless(rmap_head, &iter, sptep, spte) {
+ if (!is_accessed_spte(spte))
+ continue;
- if (test_only)
- return true;
-
- if (spte_ad_enabled(spte)) {
- clear_bit((ffs(shadow_accessed_mask) - 1),
- (unsigned long *)sptep);
- } else {
- /*
- * WARN if mmu_spte_update() signals the need
- * for a TLB flush, as Access tracking a SPTE
- * should never trigger an _immediate_ flush.
- */
- spte = mark_spte_for_access_track(spte);
- WARN_ON_ONCE(mmu_spte_update(sptep, spte));
+ if (test_only) {
+ kvm_rmap_unlock_readonly(rmap_head, rmap_val);
+ return true;
+ }
+
+ if (spte_ad_enabled(spte))
+ clear_bit((ffs(shadow_accessed_mask) - 1),
+ (unsigned long *)sptep);
+ else
+ /*
+ * If the following cmpxchg fails, the
+ * spte is being concurrently modified
+ * and should most likely stay young.
+ */
+ cmpxchg64(sptep, spte,
+ mark_spte_for_access_track(spte));
+ young = true;
}
- young = true;
+
+ kvm_rmap_unlock_readonly(rmap_head, rmap_val);
}
}
return young;
}
+static bool kvm_may_have_shadow_mmu_sptes(struct kvm *kvm)
+{
+ return !tdp_mmu_enabled || READ_ONCE(kvm->arch.indirect_shadow_pages);
+}
+
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
bool young = false;
- if (kvm_memslots_have_rmaps(kvm))
- young = kvm_rmap_age_gfn_range(kvm, range, false);
-
if (tdp_mmu_enabled)
- young |= kvm_tdp_mmu_age_gfn_range(kvm, range);
+ young = kvm_tdp_mmu_age_gfn_range(kvm, range);
+
+ if (kvm_may_have_shadow_mmu_sptes(kvm))
+ young |= kvm_rmap_age_gfn_range(kvm, range, false);
return young;
}
@@ -1605,11 +1773,14 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
bool young = false;
- if (kvm_memslots_have_rmaps(kvm))
- young = kvm_rmap_age_gfn_range(kvm, range, true);
-
if (tdp_mmu_enabled)
- young |= kvm_tdp_mmu_test_age_gfn(kvm, range);
+ young = kvm_tdp_mmu_test_age_gfn(kvm, range);
+
+ if (young)
+ return young;
+
+ if (kvm_may_have_shadow_mmu_sptes(kvm))
+ young |= kvm_rmap_age_gfn_range(kvm, range, true);
return young;
}
@@ -1656,13 +1827,14 @@ static unsigned kvm_page_table_hashfn(gfn_t gfn)
return hash_64(gfn, KVM_MMU_HASH_SHIFT);
}
-static void mmu_page_add_parent_pte(struct kvm_mmu_memory_cache *cache,
+static void mmu_page_add_parent_pte(struct kvm *kvm,
+ struct kvm_mmu_memory_cache *cache,
struct kvm_mmu_page *sp, u64 *parent_pte)
{
if (!parent_pte)
return;
- pte_list_add(cache, parent_pte, &sp->parent_ptes);
+ pte_list_add(kvm, cache, parent_pte, &sp->parent_ptes);
}
static void mmu_page_remove_parent_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
@@ -2352,7 +2524,7 @@ static void __link_shadow_page(struct kvm *kvm,
mmu_spte_set(sptep, spte);
- mmu_page_add_parent_pte(cache, sp, sptep);
+ mmu_page_add_parent_pte(kvm, cache, sp, sptep);
/*
* The non-direct sub-pagetable must be updated before linking. For
@@ -2416,7 +2588,8 @@ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
* avoids retaining a large number of stale nested SPs.
*/
if (tdp_enabled && invalid_list &&
- child->role.guest_mode && !child->parent_ptes.val)
+ child->role.guest_mode &&
+ !atomic_long_read(&child->parent_ptes.val))
return kvm_mmu_prepare_zap_page(kvm, child,
invalid_list);
}
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index f4711674c47b..68e323568e95 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -510,8 +510,7 @@ error:
* Note, pte_access holds the raw RWX bits from the EPTE, not
* ACC_*_MASK flags!
*/
- walker->fault.exit_qualification |= (pte_access & VMX_EPT_RWX_MASK) <<
- EPT_VIOLATION_RWX_SHIFT;
+ walker->fault.exit_qualification |= EPT_VIOLATION_RWX_TO_PROT(pte_access);
}
#endif
walker->fault.address = addr;
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index 22551e2f1d00..0f9f47b4ab0e 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -129,25 +129,32 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
}
/*
- * Returns true if the SPTE has bits that may be set without holding mmu_lock.
- * The caller is responsible for checking if the SPTE is shadow-present, and
- * for determining whether or not the caller cares about non-leaf SPTEs.
+ * Returns true if the SPTE needs to be updated atomically due to having bits
+ * that may be changed without holding mmu_lock, and for which KVM must not
+ * lose information. E.g. KVM must not drop Dirty bit information. The caller
+ * is responsible for checking if the SPTE is shadow-present, and for
+ * determining whether or not the caller cares about non-leaf SPTEs.
*/
-bool spte_has_volatile_bits(u64 spte)
+bool spte_needs_atomic_update(u64 spte)
{
+ /* SPTEs can be made Writable bit by KVM's fast page fault handler. */
if (!is_writable_pte(spte) && is_mmu_writable_spte(spte))
return true;
- if (is_access_track_spte(spte))
+ /*
+ * A/D-disabled SPTEs can be access-tracked by aging, and access-tracked
+ * SPTEs can be restored by KVM's fast page fault handler.
+ */
+ if (!spte_ad_enabled(spte))
return true;
- if (spte_ad_enabled(spte)) {
- if (!(spte & shadow_accessed_mask) ||
- (is_writable_pte(spte) && !(spte & shadow_dirty_mask)))
- return true;
- }
-
- return false;
+ /*
+ * Dirty and Accessed bits can be set by the CPU. Ignore the Accessed
+ * bit, as KVM tolerates false negatives/positives, e.g. KVM doesn't
+ * invalidate TLBs when aging SPTEs, and so it's safe to clobber the
+ * Accessed bit (and rare in practice).
+ */
+ return is_writable_pte(spte) && !(spte & shadow_dirty_mask);
}
bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 59746854c0af..79cdceba9857 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -519,7 +519,7 @@ static inline u64 get_mmio_spte_generation(u64 spte)
return gen;
}
-bool spte_has_volatile_bits(u64 spte);
+bool spte_needs_atomic_update(u64 spte);
bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
const struct kvm_memory_slot *slot,
diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
index 047b78333653..364c5da6c499 100644
--- a/arch/x86/kvm/mmu/tdp_iter.h
+++ b/arch/x86/kvm/mmu/tdp_iter.h
@@ -25,6 +25,13 @@ static inline u64 kvm_tdp_mmu_write_spte_atomic(tdp_ptep_t sptep, u64 new_spte)
return xchg(rcu_dereference(sptep), new_spte);
}
+static inline u64 tdp_mmu_clear_spte_bits_atomic(tdp_ptep_t sptep, u64 mask)
+{
+ atomic64_t *sptep_atomic = (atomic64_t *)rcu_dereference(sptep);
+
+ return (u64)atomic64_fetch_and(~mask, sptep_atomic);
+}
+
static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte)
{
KVM_MMU_WARN_ON(is_ept_ve_possible(new_spte));
@@ -32,28 +39,21 @@ static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte)
}
/*
- * SPTEs must be modified atomically if they are shadow-present, leaf
- * SPTEs, and have volatile bits, i.e. has bits that can be set outside
- * of mmu_lock. The Writable bit can be set by KVM's fast page fault
- * handler, and Accessed and Dirty bits can be set by the CPU.
- *
- * Note, non-leaf SPTEs do have Accessed bits and those bits are
- * technically volatile, but KVM doesn't consume the Accessed bit of
- * non-leaf SPTEs, i.e. KVM doesn't care if it clobbers the bit. This
- * logic needs to be reassessed if KVM were to use non-leaf Accessed
- * bits, e.g. to skip stepping down into child SPTEs when aging SPTEs.
+ * SPTEs must be modified atomically if they are shadow-present, leaf SPTEs,
+ * and have volatile bits (bits that can be set outside of mmu_lock) that
+ * must not be clobbered.
*/
-static inline bool kvm_tdp_mmu_spte_need_atomic_write(u64 old_spte, int level)
+static inline bool kvm_tdp_mmu_spte_need_atomic_update(u64 old_spte, int level)
{
return is_shadow_present_pte(old_spte) &&
is_last_spte(old_spte, level) &&
- spte_has_volatile_bits(old_spte);
+ spte_needs_atomic_update(old_spte);
}
static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte,
u64 new_spte, int level)
{
- if (kvm_tdp_mmu_spte_need_atomic_write(old_spte, level))
+ if (kvm_tdp_mmu_spte_need_atomic_update(old_spte, level))
return kvm_tdp_mmu_write_spte_atomic(sptep, new_spte);
__kvm_tdp_mmu_write_spte(sptep, new_spte);
@@ -63,12 +63,8 @@ static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte,
static inline u64 tdp_mmu_clear_spte_bits(tdp_ptep_t sptep, u64 old_spte,
u64 mask, int level)
{
- atomic64_t *sptep_atomic;
-
- if (kvm_tdp_mmu_spte_need_atomic_write(old_spte, level)) {
- sptep_atomic = (atomic64_t *)rcu_dereference(sptep);
- return (u64)atomic64_fetch_and(~mask, sptep_atomic);
- }
+ if (kvm_tdp_mmu_spte_need_atomic_update(old_spte, level))
+ return tdp_mmu_clear_spte_bits_atomic(sptep, mask);
__kvm_tdp_mmu_write_spte(sptep, old_spte & ~mask);
return old_spte;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 046b6ba31197..21a3b8166242 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -40,7 +40,9 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
kvm_tdp_mmu_invalidate_roots(kvm, KVM_VALID_ROOTS);
kvm_tdp_mmu_zap_invalidated_roots(kvm, false);
- WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages));
+#ifdef CONFIG_KVM_PROVE_MMU
+ KVM_MMU_WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages));
+#endif
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
/*
@@ -193,6 +195,19 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
!tdp_mmu_root_match((_root), (_types)))) { \
} else
+/*
+ * Iterate over all TDP MMU roots in an RCU read-side critical section.
+ * It is safe to iterate over the SPTEs under the root, but their values will
+ * be unstable, so all writes must be atomic. As this routine is meant to be
+ * used without holding the mmu_lock at all, any bits that are flipped must
+ * be reflected in kvm_tdp_mmu_spte_need_atomic_write().
+ */
+#define for_each_tdp_mmu_root_rcu(_kvm, _root, _as_id, _types) \
+ list_for_each_entry_rcu(_root, &_kvm->arch.tdp_mmu_roots, link) \
+ if ((_as_id >= 0 && kvm_mmu_page_as_id(_root) != _as_id) || \
+ !tdp_mmu_root_match((_root), (_types))) { \
+ } else
+
#define for_each_valid_tdp_mmu_root(_kvm, _root, _as_id) \
__for_each_tdp_mmu_root(_kvm, _root, _as_id, KVM_VALID_ROOTS)
@@ -312,13 +327,17 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
static void tdp_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
kvm_account_pgtable_pages((void *)sp->spt, +1);
+#ifdef CONFIG_KVM_PROVE_MMU
atomic64_inc(&kvm->arch.tdp_mmu_pages);
+#endif
}
static void tdp_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
kvm_account_pgtable_pages((void *)sp->spt, -1);
+#ifdef CONFIG_KVM_PROVE_MMU
atomic64_dec(&kvm->arch.tdp_mmu_pages);
+#endif
}
/**
@@ -774,9 +793,6 @@ static inline void tdp_mmu_iter_set_spte(struct kvm *kvm, struct tdp_iter *iter,
continue; \
else
-#define tdp_mmu_for_each_pte(_iter, _kvm, _root, _start, _end) \
- for_each_tdp_pte(_iter, _kvm, _root, _start, _end)
-
static inline bool __must_check tdp_mmu_iter_need_resched(struct kvm *kvm,
struct tdp_iter *iter)
{
@@ -1235,7 +1251,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
rcu_read_lock();
- tdp_mmu_for_each_pte(iter, kvm, root, fault->gfn, fault->gfn + 1) {
+ for_each_tdp_pte(iter, kvm, root, fault->gfn, fault->gfn + 1) {
int r;
if (fault->nx_huge_page_workaround_enabled)
@@ -1332,21 +1348,22 @@ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
* from the clear_young() or clear_flush_young() notifier, which uses the
* return value to determine if the page has been accessed.
*/
-static void kvm_tdp_mmu_age_spte(struct tdp_iter *iter)
+static void kvm_tdp_mmu_age_spte(struct kvm *kvm, struct tdp_iter *iter)
{
u64 new_spte;
if (spte_ad_enabled(iter->old_spte)) {
- iter->old_spte = tdp_mmu_clear_spte_bits(iter->sptep,
- iter->old_spte,
- shadow_accessed_mask,
- iter->level);
+ iter->old_spte = tdp_mmu_clear_spte_bits_atomic(iter->sptep,
+ shadow_accessed_mask);
new_spte = iter->old_spte & ~shadow_accessed_mask;
} else {
new_spte = mark_spte_for_access_track(iter->old_spte);
- iter->old_spte = kvm_tdp_mmu_write_spte(iter->sptep,
- iter->old_spte, new_spte,
- iter->level);
+ /*
+ * It is safe for the following cmpxchg to fail. Leave the
+ * Accessed bit set, as the spte is most likely young anyway.
+ */
+ if (__tdp_mmu_set_spte_atomic(kvm, iter, new_spte))
+ return;
}
trace_kvm_tdp_mmu_spte_changed(iter->as_id, iter->gfn, iter->level,
@@ -1371,9 +1388,9 @@ static bool __kvm_tdp_mmu_age_gfn_range(struct kvm *kvm,
* valid roots!
*/
WARN_ON(types & ~KVM_VALID_ROOTS);
- __for_each_tdp_mmu_root(kvm, root, range->slot->as_id, types) {
- guard(rcu)();
+ guard(rcu)();
+ for_each_tdp_mmu_root_rcu(kvm, root, range->slot->as_id, types) {
tdp_root_for_each_leaf_pte(iter, kvm, root, range->start, range->end) {
if (!is_accessed_spte(iter.old_spte))
continue;
@@ -1382,7 +1399,7 @@ static bool __kvm_tdp_mmu_age_gfn_range(struct kvm *kvm,
return true;
ret = true;
- kvm_tdp_mmu_age_spte(&iter);
+ kvm_tdp_mmu_age_spte(kvm, &iter);
}
}
@@ -1904,7 +1921,7 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
*root_level = vcpu->arch.mmu->root_role.level;
- tdp_mmu_for_each_pte(iter, vcpu->kvm, root, gfn, gfn + 1) {
+ for_each_tdp_pte(iter, vcpu->kvm, root, gfn, gfn + 1) {
leaf = iter.level;
sptes[leaf] = iter.old_spte;
}
@@ -1931,7 +1948,7 @@ u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, gfn_t gfn,
struct tdp_iter iter;
tdp_ptep_t sptep = NULL;
- tdp_mmu_for_each_pte(iter, vcpu->kvm, root, gfn, gfn + 1) {
+ for_each_tdp_pte(iter, vcpu->kvm, root, gfn, gfn + 1) {
*spte = iter.old_spte;
sptep = iter.sptep;
}
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index e0ab7df27b66..699e551ec93b 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -358,7 +358,7 @@ void enter_smm(struct kvm_vcpu *vcpu)
goto error;
#endif
- kvm_update_cpuid_runtime(vcpu);
+ vcpu->arch.cpuid_dynamic_bits_dirty = true;
kvm_mmu_reset_context(vcpu);
return;
error:
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 04c375bf1ac2..834b67672d50 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -994,7 +994,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
/* in case we halted in L2 */
- svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
/* Give the current vmcb to the guest */
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 661108d65ee7..0bc708ee2788 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -140,7 +140,7 @@ static inline bool is_mirroring_enc_context(struct kvm *kvm)
static bool sev_vcpu_has_debug_swap(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
- struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(vcpu->kvm);
return sev->vmsa_features & SVM_SEV_FEAT_DEBUG_SWAP;
}
@@ -226,9 +226,7 @@ e_uncharge:
static unsigned int sev_get_asid(struct kvm *kvm)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
-
- return sev->asid;
+ return to_kvm_sev_info(kvm)->asid;
}
static void sev_asid_free(struct kvm_sev_info *sev)
@@ -403,7 +401,7 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
struct kvm_sev_init *data,
unsigned long vm_type)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct sev_platform_init_args init_args = {0};
bool es_active = vm_type != KVM_X86_SEV_VM;
u64 valid_vmsa_features = es_active ? sev_supported_vmsa_features : 0;
@@ -500,10 +498,9 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
static int sev_guest_init2(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_init data;
- if (!sev->need_init)
+ if (!to_kvm_sev_info(kvm)->need_init)
return -EINVAL;
if (kvm->arch.vm_type != KVM_X86_SEV_VM &&
@@ -543,14 +540,14 @@ static int __sev_issue_cmd(int fd, int id, void *data, int *error)
static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
return __sev_issue_cmd(sev->fd, id, data, error);
}
static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct sev_data_launch_start start;
struct kvm_sev_launch_start params;
void *dh_blob, *session_blob;
@@ -622,9 +619,9 @@ e_free_dh:
static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
unsigned long ulen, unsigned long *n,
- int write)
+ unsigned int flags)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
unsigned long npages, size;
int npinned;
unsigned long locked, lock_limit;
@@ -663,7 +660,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
return ERR_PTR(-ENOMEM);
/* Pin the user virtual address. */
- npinned = pin_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
+ npinned = pin_user_pages_fast(uaddr, npages, flags, pages);
if (npinned != npages) {
pr_err("SEV: Failure locking %lu pages.\n", npages);
ret = -ENOMEM;
@@ -686,11 +683,9 @@ err:
static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
unsigned long npages)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
-
unpin_user_pages(pages, npages);
kvfree(pages);
- sev->pages_locked -= npages;
+ to_kvm_sev_info(kvm)->pages_locked -= npages;
}
static void sev_clflush_pages(struct page *pages[], unsigned long npages)
@@ -734,7 +729,6 @@ static unsigned long get_num_contig_pages(unsigned long idx,
static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_launch_update_data params;
struct sev_data_launch_update_data data;
struct page **inpages;
@@ -751,7 +745,7 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
vaddr_end = vaddr + size;
/* Lock the user memory. */
- inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
+ inpages = sev_pin_memory(kvm, vaddr, size, &npages, FOLL_WRITE);
if (IS_ERR(inpages))
return PTR_ERR(inpages);
@@ -762,7 +756,7 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
sev_clflush_pages(inpages, npages);
data.reserved = 0;
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
int offset, len;
@@ -802,7 +796,7 @@ e_unpin:
static int sev_es_sync_vmsa(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
- struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(vcpu->kvm);
struct sev_es_save_area *save = svm->sev_es.vmsa;
struct xregs_state *xsave;
const u8 *s;
@@ -972,7 +966,6 @@ static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
void __user *measure = u64_to_user_ptr(argp->data);
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_launch_measure data;
struct kvm_sev_launch_measure params;
void __user *p = NULL;
@@ -1005,7 +998,7 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
}
cmd:
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, &data, &argp->error);
/*
@@ -1033,19 +1026,17 @@ e_free_blob:
static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_launch_finish data;
if (!sev_guest(kvm))
return -ENOTTY;
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
return sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, &data, &argp->error);
}
static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_guest_status params;
struct sev_data_guest_status data;
int ret;
@@ -1055,7 +1046,7 @@ static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
memset(&data, 0, sizeof(data));
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, &data, &argp->error);
if (ret)
return ret;
@@ -1074,11 +1065,10 @@ static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
unsigned long dst, int size,
int *error, bool enc)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_dbg data;
data.reserved = 0;
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
data.dst_addr = dst;
data.src_addr = src;
data.len = size;
@@ -1250,7 +1240,7 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
if (IS_ERR(src_p))
return PTR_ERR(src_p);
- dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
+ dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, FOLL_WRITE);
if (IS_ERR(dst_p)) {
sev_unpin_memory(kvm, src_p, n);
return PTR_ERR(dst_p);
@@ -1302,7 +1292,6 @@ err:
static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_launch_secret data;
struct kvm_sev_launch_secret params;
struct page **pages;
@@ -1316,7 +1305,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (copy_from_user(&params, u64_to_user_ptr(argp->data), sizeof(params)))
return -EFAULT;
- pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
+ pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, FOLL_WRITE);
if (IS_ERR(pages))
return PTR_ERR(pages);
@@ -1358,7 +1347,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
data.hdr_address = __psp_pa(hdr);
data.hdr_len = params.hdr_len;
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, &data, &argp->error);
kfree(hdr);
@@ -1378,7 +1367,6 @@ e_unpin_memory:
static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
void __user *report = u64_to_user_ptr(argp->data);
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_attestation_report data;
struct kvm_sev_attestation_report params;
void __user *p;
@@ -1411,7 +1399,7 @@ static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp)
memcpy(data.mnonce, params.mnonce, sizeof(params.mnonce));
}
cmd:
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_ATTESTATION_REPORT, &data, &argp->error);
/*
* If we query the session length, FW responded with expected data.
@@ -1441,12 +1429,11 @@ static int
__sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp,
struct kvm_sev_send_start *params)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_send_start data;
int ret;
memset(&data, 0, sizeof(data));
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error);
params->session_len = data.session_len;
@@ -1459,7 +1446,6 @@ __sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp,
static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_send_start data;
struct kvm_sev_send_start params;
void *amd_certs, *session_data;
@@ -1520,7 +1506,7 @@ static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
data.amd_certs_len = params.amd_certs_len;
data.session_address = __psp_pa(session_data);
data.session_len = params.session_len;
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error);
@@ -1552,12 +1538,11 @@ static int
__sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp,
struct kvm_sev_send_update_data *params)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_send_update_data data;
int ret;
memset(&data, 0, sizeof(data));
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error);
params->hdr_len = data.hdr_len;
@@ -1572,7 +1557,6 @@ __sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp,
static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_send_update_data data;
struct kvm_sev_send_update_data params;
void *hdr, *trans_data;
@@ -1626,7 +1610,7 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
data.guest_address |= sev_me_mask;
data.guest_len = params.guest_len;
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error);
@@ -1657,31 +1641,29 @@ e_unpin:
static int sev_send_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_send_finish data;
if (!sev_guest(kvm))
return -ENOTTY;
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
return sev_issue_cmd(kvm, SEV_CMD_SEND_FINISH, &data, &argp->error);
}
static int sev_send_cancel(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_send_cancel data;
if (!sev_guest(kvm))
return -ENOTTY;
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
return sev_issue_cmd(kvm, SEV_CMD_SEND_CANCEL, &data, &argp->error);
}
static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct sev_data_receive_start start;
struct kvm_sev_receive_start params;
int *error = &argp->error;
@@ -1755,7 +1737,6 @@ e_free_pdh:
static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_receive_update_data params;
struct sev_data_receive_update_data data;
void *hdr = NULL, *trans = NULL;
@@ -1798,7 +1779,7 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* Pin guest memory */
guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
- PAGE_SIZE, &n, 1);
+ PAGE_SIZE, &n, FOLL_WRITE);
if (IS_ERR(guest_page)) {
ret = PTR_ERR(guest_page);
goto e_free_trans;
@@ -1815,7 +1796,7 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
data.guest_address |= sev_me_mask;
data.guest_len = params.guest_len;
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_RECEIVE_UPDATE_DATA, &data,
&argp->error);
@@ -1832,13 +1813,12 @@ e_free_hdr:
static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_receive_finish data;
if (!sev_guest(kvm))
return -ENOTTY;
- data.handle = sev->handle;
+ data.handle = to_kvm_sev_info(kvm)->handle;
return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error);
}
@@ -1858,8 +1838,8 @@ static bool is_cmd_allowed_from_mirror(u32 cmd_id)
static int sev_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
{
- struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info;
- struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info;
+ struct kvm_sev_info *dst_sev = to_kvm_sev_info(dst_kvm);
+ struct kvm_sev_info *src_sev = to_kvm_sev_info(src_kvm);
int r = -EBUSY;
if (dst_kvm == src_kvm)
@@ -1893,8 +1873,8 @@ release_dst:
static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
{
- struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info;
- struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info;
+ struct kvm_sev_info *dst_sev = to_kvm_sev_info(dst_kvm);
+ struct kvm_sev_info *src_sev = to_kvm_sev_info(src_kvm);
mutex_unlock(&dst_kvm->lock);
mutex_unlock(&src_kvm->lock);
@@ -1968,8 +1948,8 @@ static void sev_unlock_vcpus_for_migration(struct kvm *kvm)
static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm)
{
- struct kvm_sev_info *dst = &to_kvm_svm(dst_kvm)->sev_info;
- struct kvm_sev_info *src = &to_kvm_svm(src_kvm)->sev_info;
+ struct kvm_sev_info *dst = to_kvm_sev_info(dst_kvm);
+ struct kvm_sev_info *src = to_kvm_sev_info(src_kvm);
struct kvm_vcpu *dst_vcpu, *src_vcpu;
struct vcpu_svm *dst_svm, *src_svm;
struct kvm_sev_info *mirror;
@@ -2009,8 +1989,7 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm)
* and add the new mirror to the list.
*/
if (is_mirroring_enc_context(dst_kvm)) {
- struct kvm_sev_info *owner_sev_info =
- &to_kvm_svm(dst->enc_context_owner)->sev_info;
+ struct kvm_sev_info *owner_sev_info = to_kvm_sev_info(dst->enc_context_owner);
list_del(&src->mirror_entry);
list_add_tail(&dst->mirror_entry, &owner_sev_info->mirror_vms);
@@ -2069,7 +2048,7 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src)
int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
{
- struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *dst_sev = to_kvm_sev_info(kvm);
struct kvm_sev_info *src_sev, *cg_cleanup_sev;
CLASS(fd, f)(source_fd);
struct kvm *source_kvm;
@@ -2093,7 +2072,7 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
goto out_unlock;
}
- src_sev = &to_kvm_svm(source_kvm)->sev_info;
+ src_sev = to_kvm_sev_info(source_kvm);
dst_sev->misc_cg = get_current_misc_cg();
cg_cleanup_sev = dst_sev;
@@ -2181,7 +2160,7 @@ static void *snp_context_create(struct kvm *kvm, struct kvm_sev_cmd *argp)
static int snp_bind_asid(struct kvm *kvm, int *error)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct sev_data_snp_activate data = {0};
data.gctx_paddr = __psp_pa(sev->snp_context);
@@ -2191,7 +2170,7 @@ static int snp_bind_asid(struct kvm *kvm, int *error)
static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct sev_data_snp_launch_start start = {0};
struct kvm_sev_snp_launch_start params;
int rc;
@@ -2260,7 +2239,7 @@ static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn_start, kvm_pfn_t pf
void __user *src, int order, void *opaque)
{
struct sev_gmem_populate_args *sev_populate_args = opaque;
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
int n_private = 0, ret, i;
int npages = (1 << order);
gfn_t gfn;
@@ -2350,7 +2329,7 @@ err:
static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct sev_gmem_populate_args sev_populate_args = {0};
struct kvm_sev_snp_launch_update params;
struct kvm_memory_slot *memslot;
@@ -2434,7 +2413,7 @@ out:
static int snp_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct sev_data_snp_launch_update data = {};
struct kvm_vcpu *vcpu;
unsigned long i;
@@ -2482,7 +2461,7 @@ static int snp_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct kvm_sev_snp_launch_finish params;
struct sev_data_snp_launch_finish *data;
void *id_block = NULL, *id_auth = NULL;
@@ -2677,7 +2656,7 @@ out:
int sev_mem_enc_register_region(struct kvm *kvm,
struct kvm_enc_region *range)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct enc_region *region;
int ret = 0;
@@ -2696,7 +2675,8 @@ int sev_mem_enc_register_region(struct kvm *kvm,
return -ENOMEM;
mutex_lock(&kvm->lock);
- region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
+ region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages,
+ FOLL_WRITE | FOLL_LONGTERM);
if (IS_ERR(region->pages)) {
ret = PTR_ERR(region->pages);
mutex_unlock(&kvm->lock);
@@ -2729,7 +2709,7 @@ e_free:
static struct enc_region *
find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct list_head *head = &sev->regions_list;
struct enc_region *i;
@@ -2824,9 +2804,9 @@ int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd)
* The mirror kvm holds an enc_context_owner ref so its asid can't
* disappear until we're done with it
*/
- source_sev = &to_kvm_svm(source_kvm)->sev_info;
+ source_sev = to_kvm_sev_info(source_kvm);
kvm_get_kvm(source_kvm);
- mirror_sev = &to_kvm_svm(kvm)->sev_info;
+ mirror_sev = to_kvm_sev_info(kvm);
list_add_tail(&mirror_sev->mirror_entry, &source_sev->mirror_vms);
/* Set enc_context_owner and copy its encryption context over */
@@ -2854,7 +2834,7 @@ e_unlock:
static int snp_decommission_context(struct kvm *kvm)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct sev_data_snp_addr data = {};
int ret;
@@ -2879,7 +2859,7 @@ static int snp_decommission_context(struct kvm *kvm)
void sev_vm_destroy(struct kvm *kvm)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
struct list_head *head = &sev->regions_list;
struct list_head *pos, *q;
@@ -3271,7 +3251,7 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
if (kvm_ghcb_xcr0_is_valid(svm)) {
vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb);
- kvm_update_cpuid_runtime(vcpu);
+ vcpu->arch.cpuid_dynamic_bits_dirty = true;
}
/* Copy the GHCB exit information into the VMCB fields */
@@ -3430,8 +3410,7 @@ vmgexit_err:
dump_ghcb(svm);
}
- ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, reason);
+ svm_vmgexit_bad_input(svm, reason);
/* Resume the guest to "return" the error code. */
return 1;
@@ -3472,10 +3451,19 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
svm->sev_es.ghcb = NULL;
}
-void pre_sev_run(struct vcpu_svm *svm, int cpu)
+int pre_sev_run(struct vcpu_svm *svm, int cpu)
{
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
- unsigned int asid = sev_get_asid(svm->vcpu.kvm);
+ struct kvm *kvm = svm->vcpu.kvm;
+ unsigned int asid = sev_get_asid(kvm);
+
+ /*
+ * Reject KVM_RUN if userspace attempts to run the vCPU with an invalid
+ * VMSA, e.g. if userspace forces the vCPU to be RUNNABLE after an SNP
+ * AP Destroy event.
+ */
+ if (sev_es_guest(kvm) && !VALID_PAGE(svm->vmcb->control.vmsa_pa))
+ return -EINVAL;
/* Assign the asid allocated with this SEV guest */
svm->asid = asid;
@@ -3488,11 +3476,12 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
*/
if (sd->sev_vmcbs[asid] == svm->vmcb &&
svm->vcpu.arch.last_vmentry_cpu == cpu)
- return;
+ return 0;
sd->sev_vmcbs[asid] = svm->vmcb;
svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
+ return 0;
}
#define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE)
@@ -3574,8 +3563,7 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
return 0;
e_scratch:
- ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_SCRATCH_AREA);
+ svm_vmgexit_bad_input(svm, GHCB_ERR_INVALID_SCRATCH_AREA);
return 1;
}
@@ -3675,7 +3663,14 @@ static void snp_complete_psc(struct vcpu_svm *svm, u64 psc_ret)
svm->sev_es.psc_inflight = 0;
svm->sev_es.psc_idx = 0;
svm->sev_es.psc_2m = false;
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, psc_ret);
+
+ /*
+ * PSC requests always get a "no action" response in SW_EXITINFO1, with
+ * a PSC-specific return code in SW_EXITINFO2 that provides the "real"
+ * return code. E.g. if the PSC request was interrupted, the need to
+ * retry is communicated via SW_EXITINFO2, not SW_EXITINFO1.
+ */
+ svm_vmgexit_no_action(svm, psc_ret);
}
static void __snp_complete_one_psc(struct vcpu_svm *svm)
@@ -3847,110 +3842,90 @@ next_range:
BUG();
}
-static int __sev_snp_update_protected_guest_state(struct kvm_vcpu *vcpu)
+/*
+ * Invoked as part of svm_vcpu_reset() processing of an init event.
+ */
+void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ struct kvm_memory_slot *slot;
+ struct page *page;
+ kvm_pfn_t pfn;
+ gfn_t gfn;
- WARN_ON(!mutex_is_locked(&svm->sev_es.snp_vmsa_mutex));
+ if (!sev_snp_guest(vcpu->kvm))
+ return;
+
+ guard(mutex)(&svm->sev_es.snp_vmsa_mutex);
+
+ if (!svm->sev_es.snp_ap_waiting_for_reset)
+ return;
+
+ svm->sev_es.snp_ap_waiting_for_reset = false;
/* Mark the vCPU as offline and not runnable */
vcpu->arch.pv.pv_unhalted = false;
- vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_HALTED);
/* Clear use of the VMSA */
svm->vmcb->control.vmsa_pa = INVALID_PAGE;
- if (VALID_PAGE(svm->sev_es.snp_vmsa_gpa)) {
- gfn_t gfn = gpa_to_gfn(svm->sev_es.snp_vmsa_gpa);
- struct kvm_memory_slot *slot;
- struct page *page;
- kvm_pfn_t pfn;
-
- slot = gfn_to_memslot(vcpu->kvm, gfn);
- if (!slot)
- return -EINVAL;
-
- /*
- * The new VMSA will be private memory guest memory, so
- * retrieve the PFN from the gmem backend.
- */
- if (kvm_gmem_get_pfn(vcpu->kvm, slot, gfn, &pfn, &page, NULL))
- return -EINVAL;
-
- /*
- * From this point forward, the VMSA will always be a
- * guest-mapped page rather than the initial one allocated
- * by KVM in svm->sev_es.vmsa. In theory, svm->sev_es.vmsa
- * could be free'd and cleaned up here, but that involves
- * cleanups like wbinvd_on_all_cpus() which would ideally
- * be handled during teardown rather than guest boot.
- * Deferring that also allows the existing logic for SEV-ES
- * VMSAs to be re-used with minimal SNP-specific changes.
- */
- svm->sev_es.snp_has_guest_vmsa = true;
-
- /* Use the new VMSA */
- svm->vmcb->control.vmsa_pa = pfn_to_hpa(pfn);
-
- /* Mark the vCPU as runnable */
- vcpu->arch.pv.pv_unhalted = false;
- vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-
- svm->sev_es.snp_vmsa_gpa = INVALID_PAGE;
-
- /*
- * gmem pages aren't currently migratable, but if this ever
- * changes then care should be taken to ensure
- * svm->sev_es.vmsa is pinned through some other means.
- */
- kvm_release_page_clean(page);
- }
-
/*
* When replacing the VMSA during SEV-SNP AP creation,
* mark the VMCB dirty so that full state is always reloaded.
*/
vmcb_mark_all_dirty(svm->vmcb);
- return 0;
-}
+ if (!VALID_PAGE(svm->sev_es.snp_vmsa_gpa))
+ return;
-/*
- * Invoked as part of svm_vcpu_reset() processing of an init event.
- */
-void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
- int ret;
+ gfn = gpa_to_gfn(svm->sev_es.snp_vmsa_gpa);
+ svm->sev_es.snp_vmsa_gpa = INVALID_PAGE;
- if (!sev_snp_guest(vcpu->kvm))
+ slot = gfn_to_memslot(vcpu->kvm, gfn);
+ if (!slot)
return;
- mutex_lock(&svm->sev_es.snp_vmsa_mutex);
+ /*
+ * The new VMSA will be private memory guest memory, so retrieve the
+ * PFN from the gmem backend.
+ */
+ if (kvm_gmem_get_pfn(vcpu->kvm, slot, gfn, &pfn, &page, NULL))
+ return;
- if (!svm->sev_es.snp_ap_waiting_for_reset)
- goto unlock;
+ /*
+ * From this point forward, the VMSA will always be a guest-mapped page
+ * rather than the initial one allocated by KVM in svm->sev_es.vmsa. In
+ * theory, svm->sev_es.vmsa could be free'd and cleaned up here, but
+ * that involves cleanups like wbinvd_on_all_cpus() which would ideally
+ * be handled during teardown rather than guest boot. Deferring that
+ * also allows the existing logic for SEV-ES VMSAs to be re-used with
+ * minimal SNP-specific changes.
+ */
+ svm->sev_es.snp_has_guest_vmsa = true;
- svm->sev_es.snp_ap_waiting_for_reset = false;
+ /* Use the new VMSA */
+ svm->vmcb->control.vmsa_pa = pfn_to_hpa(pfn);
- ret = __sev_snp_update_protected_guest_state(vcpu);
- if (ret)
- vcpu_unimpl(vcpu, "snp: AP state update on init failed\n");
+ /* Mark the vCPU as runnable */
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
-unlock:
- mutex_unlock(&svm->sev_es.snp_vmsa_mutex);
+ /*
+ * gmem pages aren't currently migratable, but if this ever changes
+ * then care should be taken to ensure svm->sev_es.vmsa is pinned
+ * through some other means.
+ */
+ kvm_release_page_clean(page);
}
static int sev_snp_ap_creation(struct vcpu_svm *svm)
{
- struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(svm->vcpu.kvm);
struct kvm_vcpu *vcpu = &svm->vcpu;
struct kvm_vcpu *target_vcpu;
struct vcpu_svm *target_svm;
unsigned int request;
unsigned int apic_id;
- bool kick;
- int ret;
request = lower_32_bits(svm->vmcb->control.exit_info_1);
apic_id = upper_32_bits(svm->vmcb->control.exit_info_1);
@@ -3963,47 +3938,23 @@ static int sev_snp_ap_creation(struct vcpu_svm *svm)
return -EINVAL;
}
- ret = 0;
-
target_svm = to_svm(target_vcpu);
- /*
- * The target vCPU is valid, so the vCPU will be kicked unless the
- * request is for CREATE_ON_INIT. For any errors at this stage, the
- * kick will place the vCPU in an non-runnable state.
- */
- kick = true;
-
- mutex_lock(&target_svm->sev_es.snp_vmsa_mutex);
-
- target_svm->sev_es.snp_vmsa_gpa = INVALID_PAGE;
- target_svm->sev_es.snp_ap_waiting_for_reset = true;
-
- /* Interrupt injection mode shouldn't change for AP creation */
- if (request < SVM_VMGEXIT_AP_DESTROY) {
- u64 sev_features;
-
- sev_features = vcpu->arch.regs[VCPU_REGS_RAX];
- sev_features ^= sev->vmsa_features;
-
- if (sev_features & SVM_SEV_FEAT_INT_INJ_MODES) {
- vcpu_unimpl(vcpu, "vmgexit: invalid AP injection mode [%#lx] from guest\n",
- vcpu->arch.regs[VCPU_REGS_RAX]);
- ret = -EINVAL;
- goto out;
- }
- }
+ guard(mutex)(&target_svm->sev_es.snp_vmsa_mutex);
switch (request) {
case SVM_VMGEXIT_AP_CREATE_ON_INIT:
- kick = false;
- fallthrough;
case SVM_VMGEXIT_AP_CREATE:
+ if (vcpu->arch.regs[VCPU_REGS_RAX] != sev->vmsa_features) {
+ vcpu_unimpl(vcpu, "vmgexit: mismatched AP sev_features [%#lx] != [%#llx] from guest\n",
+ vcpu->arch.regs[VCPU_REGS_RAX], sev->vmsa_features);
+ return -EINVAL;
+ }
+
if (!page_address_valid(vcpu, svm->vmcb->control.exit_info_2)) {
vcpu_unimpl(vcpu, "vmgexit: invalid AP VMSA address [%#llx] from guest\n",
svm->vmcb->control.exit_info_2);
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
/*
@@ -4017,30 +3968,32 @@ static int sev_snp_ap_creation(struct vcpu_svm *svm)
vcpu_unimpl(vcpu,
"vmgexit: AP VMSA address [%llx] from guest is unsafe as it is 2M aligned\n",
svm->vmcb->control.exit_info_2);
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
target_svm->sev_es.snp_vmsa_gpa = svm->vmcb->control.exit_info_2;
break;
case SVM_VMGEXIT_AP_DESTROY:
+ target_svm->sev_es.snp_vmsa_gpa = INVALID_PAGE;
break;
default:
vcpu_unimpl(vcpu, "vmgexit: invalid AP creation request [%#x] from guest\n",
request);
- ret = -EINVAL;
- break;
+ return -EINVAL;
}
-out:
- if (kick) {
+ target_svm->sev_es.snp_ap_waiting_for_reset = true;
+
+ /*
+ * Unless Creation is deferred until INIT, signal the vCPU to update
+ * its state.
+ */
+ if (request != SVM_VMGEXIT_AP_CREATE_ON_INIT) {
kvm_make_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, target_vcpu);
kvm_vcpu_kick(target_vcpu);
}
- mutex_unlock(&target_svm->sev_es.snp_vmsa_mutex);
-
- return ret;
+ return 0;
}
static int snp_handle_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa)
@@ -4079,7 +4032,8 @@ static int snp_handle_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_
goto out_unlock;
}
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, SNP_GUEST_ERR(0, fw_err));
+ /* No action is requested *from KVM* if there was a firmware error. */
+ svm_vmgexit_no_action(svm, SNP_GUEST_ERR(0, fw_err));
ret = 1; /* resume guest */
@@ -4135,8 +4089,7 @@ static int snp_handle_ext_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t r
return snp_handle_guest_req(svm, req_gpa, resp_gpa);
request_invalid:
- ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_INPUT);
+ svm_vmgexit_bad_input(svm, GHCB_ERR_INVALID_INPUT);
return 1; /* resume guest */
}
@@ -4144,7 +4097,7 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
struct kvm_vcpu *vcpu = &svm->vcpu;
- struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(vcpu->kvm);
u64 ghcb_info;
int ret = 1;
@@ -4328,8 +4281,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
if (ret)
return ret;
- ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 0);
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 0);
+ svm_vmgexit_success(svm, 0);
exit_code = kvm_ghcb_get_sw_exit_code(control);
switch (exit_code) {
@@ -4364,7 +4316,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
ret = kvm_emulate_ap_reset_hold(vcpu);
break;
case SVM_VMGEXIT_AP_JUMP_TABLE: {
- struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(vcpu->kvm);
switch (control->exit_info_1) {
case 0:
@@ -4373,21 +4325,19 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
break;
case 1:
/* Get AP jump table address */
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, sev->ap_jump_table);
+ svm_vmgexit_success(svm, sev->ap_jump_table);
break;
default:
pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
control->exit_info_1);
- ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_INPUT);
+ svm_vmgexit_bad_input(svm, GHCB_ERR_INVALID_INPUT);
}
ret = 1;
break;
}
case SVM_VMGEXIT_HV_FEATURES:
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_HV_FT_SUPPORTED);
-
+ svm_vmgexit_success(svm, GHCB_HV_FT_SUPPORTED);
ret = 1;
break;
case SVM_VMGEXIT_TERM_REQUEST:
@@ -4408,8 +4358,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
case SVM_VMGEXIT_AP_CREATION:
ret = sev_snp_ap_creation(svm);
if (ret) {
- ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_INPUT);
+ svm_vmgexit_bad_input(svm, GHCB_ERR_INVALID_INPUT);
}
ret = 1;
@@ -4575,7 +4524,7 @@ void sev_init_vmcb(struct vcpu_svm *svm)
void sev_es_vcpu_reset(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
- struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(vcpu->kvm);
/*
* Set the GHCB MSR value as per the GHCB specification when emulating
@@ -4655,7 +4604,7 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
* Return from an AP Reset Hold VMGEXIT, where the guest will
* set the CS and RIP. Set SW_EXIT_INFO_2 to a non-zero value.
*/
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1);
+ svm_vmgexit_success(svm, 1);
break;
case AP_RESET_HOLD_MSR_PROTO:
/*
@@ -4853,7 +4802,7 @@ static bool is_large_rmp_possible(struct kvm *kvm, kvm_pfn_t pfn, int order)
int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
kvm_pfn_t pfn_aligned;
gfn_t gfn_aligned;
int level, rc;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e67de787fc71..8abeab91d329 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1297,8 +1297,12 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
svm_set_intercept(svm, INTERCEPT_MWAIT);
}
- if (!kvm_hlt_in_guest(vcpu->kvm))
- svm_set_intercept(svm, INTERCEPT_HLT);
+ if (!kvm_hlt_in_guest(vcpu->kvm)) {
+ if (cpu_feature_enabled(X86_FEATURE_IDLE_HLT))
+ svm_set_intercept(svm, INTERCEPT_IDLE_HLT);
+ else
+ svm_set_intercept(svm, INTERCEPT_HLT);
+ }
control->iopm_base_pa = iopm_base;
control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
@@ -1932,7 +1936,7 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
- kvm_update_cpuid_runtime(vcpu);
+ vcpu->arch.cpuid_dynamic_bits_dirty = true;
}
static void svm_set_segment(struct kvm_vcpu *vcpu,
@@ -2973,11 +2977,7 @@ static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->sev_es.ghcb))
return kvm_complete_insn_gp(vcpu, err);
- ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 1);
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb,
- X86_TRAP_GP |
- SVM_EVTINJ_TYPE_EXEPT |
- SVM_EVTINJ_VALID);
+ svm_vmgexit_inject_exception(svm, X86_TRAP_GP);
return 1;
}
@@ -3293,6 +3293,17 @@ static int invpcid_interception(struct kvm_vcpu *vcpu)
type = svm->vmcb->control.exit_info_2;
gva = svm->vmcb->control.exit_info_1;
+ /*
+ * FIXME: Perform segment checks for 32-bit mode, and inject #SS if the
+ * stack segment is used. The intercept takes priority over all
+ * #GP checks except CPL>0, but somehow still generates a linear
+ * address? The APM is sorely lacking.
+ */
+ if (is_noncanonical_address(gva, vcpu, 0)) {
+ kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+ return 1;
+ }
+
return kvm_handle_invpcid(vcpu, type, gva);
}
@@ -3363,6 +3374,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[SVM_EXIT_CR4_WRITE_TRAP] = cr_trap,
[SVM_EXIT_CR8_WRITE_TRAP] = cr_trap,
[SVM_EXIT_INVPCID] = invpcid_interception,
+ [SVM_EXIT_IDLE_HLT] = kvm_emulate_halt,
[SVM_EXIT_NPF] = npf_interception,
[SVM_EXIT_RSM] = rsm_interception,
[SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
@@ -3525,7 +3537,7 @@ int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code)
return interrupt_window_interception(vcpu);
else if (exit_code == SVM_EXIT_INTR)
return intr_interception(vcpu);
- else if (exit_code == SVM_EXIT_HLT)
+ else if (exit_code == SVM_EXIT_HLT || exit_code == SVM_EXIT_IDLE_HLT)
return kvm_emulate_halt(vcpu);
else if (exit_code == SVM_EXIT_NPF)
return npf_interception(vcpu);
@@ -3608,7 +3620,7 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
return svm_invoke_exit_handler(vcpu, exit_code);
}
-static void pre_svm_run(struct kvm_vcpu *vcpu)
+static int pre_svm_run(struct kvm_vcpu *vcpu)
{
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3630,6 +3642,8 @@ static void pre_svm_run(struct kvm_vcpu *vcpu)
/* FIXME: handle wraparound of asid_generation */
if (svm->current_vmcb->asid_generation != sd->asid_generation)
new_asid(svm, sd);
+
+ return 0;
}
static void svm_inject_nmi(struct kvm_vcpu *vcpu)
@@ -4137,20 +4151,23 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
vcpu->arch.nmi_injected = true;
svm->nmi_l1_to_l2 = nmi_l1_to_l2;
break;
- case SVM_EXITINTINFO_TYPE_EXEPT:
+ case SVM_EXITINTINFO_TYPE_EXEPT: {
+ u32 error_code = 0;
+
/*
* Never re-inject a #VC exception.
*/
if (vector == X86_TRAP_VC)
break;
- if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
- u32 err = svm->vmcb->control.exit_int_info_err;
- kvm_requeue_exception_e(vcpu, vector, err);
+ if (exitintinfo & SVM_EXITINTINFO_VALID_ERR)
+ error_code = svm->vmcb->control.exit_int_info_err;
- } else
- kvm_requeue_exception(vcpu, vector);
+ kvm_requeue_exception(vcpu, vector,
+ exitintinfo & SVM_EXITINTINFO_VALID_ERR,
+ error_code);
break;
+ }
case SVM_EXITINTINFO_TYPE_INTR:
kvm_queue_interrupt(vcpu, vector, false);
break;
@@ -4266,7 +4283,12 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu,
if (force_immediate_exit)
smp_send_reschedule(vcpu->cpu);
- pre_svm_run(vcpu);
+ if (pre_svm_run(vcpu)) {
+ vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ vcpu->run->fail_entry.hardware_entry_failure_reason = SVM_EXIT_ERR;
+ vcpu->run->fail_entry.cpu = vcpu->cpu;
+ return EXIT_FASTPATH_EXIT_USERSPACE;
+ }
sync_lapic_to_cr8(vcpu);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index ea44c1da5a7c..d4490eaed55d 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -361,20 +361,18 @@ static __always_inline struct kvm_sev_info *to_kvm_sev_info(struct kvm *kvm)
#ifdef CONFIG_KVM_AMD_SEV
static __always_inline bool sev_guest(struct kvm *kvm)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
-
- return sev->active;
+ return to_kvm_sev_info(kvm)->active;
}
static __always_inline bool sev_es_guest(struct kvm *kvm)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
return sev->es_active && !WARN_ON_ONCE(!sev->active);
}
static __always_inline bool sev_snp_guest(struct kvm *kvm)
{
- struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
return (sev->vmsa_features & SVM_SEV_FEAT_SNP_ACTIVE) &&
!WARN_ON_ONCE(!sev_es_guest(kvm));
@@ -581,6 +579,35 @@ static inline bool is_vnmi_enabled(struct vcpu_svm *svm)
return false;
}
+static inline void svm_vmgexit_set_return_code(struct vcpu_svm *svm,
+ u64 response, u64 data)
+{
+ ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, response);
+ ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, data);
+}
+
+static inline void svm_vmgexit_inject_exception(struct vcpu_svm *svm, u8 vector)
+{
+ u64 data = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT | vector;
+
+ svm_vmgexit_set_return_code(svm, GHCB_HV_RESP_ISSUE_EXCEPTION, data);
+}
+
+static inline void svm_vmgexit_bad_input(struct vcpu_svm *svm, u64 suberror)
+{
+ svm_vmgexit_set_return_code(svm, GHCB_HV_RESP_MALFORMED_INPUT, suberror);
+}
+
+static inline void svm_vmgexit_success(struct vcpu_svm *svm, u64 data)
+{
+ svm_vmgexit_set_return_code(svm, GHCB_HV_RESP_NO_ACTION, data);
+}
+
+static inline void svm_vmgexit_no_action(struct vcpu_svm *svm, u64 data)
+{
+ svm_vmgexit_set_return_code(svm, GHCB_HV_RESP_NO_ACTION, data);
+}
+
/* svm.c */
#define MSR_INVALID 0xffffffffU
@@ -715,7 +742,7 @@ void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu);
/* sev.c */
-void pre_sev_run(struct vcpu_svm *svm, int cpu);
+int pre_sev_run(struct vcpu_svm *svm, int cpu);
void sev_init_vmcb(struct vcpu_svm *svm);
void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm);
int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 0b844cb97978..ccda95e53f62 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -830,12 +830,12 @@ TRACE_EVENT(kvm_emulate_insn,
TP_ARGS(vcpu, failed),
TP_STRUCT__entry(
- __field( __u64, rip )
- __field( __u32, csbase )
- __field( __u8, len )
- __array( __u8, insn, 15 )
- __field( __u8, flags )
- __field( __u8, failed )
+ __field( __u64, rip )
+ __field( __u32, csbase )
+ __field( __u8, len )
+ __array( __u8, insn, X86_MAX_INSTRUCTION_LENGTH )
+ __field( __u8, flags )
+ __field( __u8, failed )
),
TP_fast_assign(
@@ -846,7 +846,7 @@ TRACE_EVENT(kvm_emulate_insn,
__entry->rip = vcpu->arch.emulate_ctxt->_eip - __entry->len;
memcpy(__entry->insn,
vcpu->arch.emulate_ctxt->fetch.data,
- 15);
+ X86_MAX_INSTRUCTION_LENGTH);
__entry->flags = kei_decode_mode(vcpu->arch.emulate_ctxt->mode);
__entry->failed = failed;
),
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index ed8a3cb53961..d06e50d9c0e7 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2970,7 +2970,7 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
case INTR_TYPE_SOFT_EXCEPTION:
case INTR_TYPE_SOFT_INTR:
case INTR_TYPE_PRIV_SW_EXCEPTION:
- if (CC(vmcs12->vm_entry_instruction_len > 15) ||
+ if (CC(vmcs12->vm_entry_instruction_len > X86_MAX_INSTRUCTION_LENGTH) ||
CC(vmcs12->vm_entry_instruction_len == 0 &&
CC(!nested_cpu_has_zero_length_injection(vcpu))))
return -EINVAL;
@@ -3771,7 +3771,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
break;
case GUEST_ACTIVITY_WAIT_SIPI:
vmx->nested.nested_run_pending = 0;
- vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_INIT_RECEIVED);
break;
default:
break;
@@ -4618,7 +4618,7 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
*/
static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
u32 vm_exit_reason, u32 exit_intr_info,
- unsigned long exit_qualification)
+ unsigned long exit_qualification, u32 exit_insn_len)
{
/* update exit information fields: */
vmcs12->vm_exit_reason = vm_exit_reason;
@@ -4646,7 +4646,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vm_exit_reason, exit_intr_info);
vmcs12->vm_exit_intr_info = exit_intr_info;
- vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+ vmcs12->vm_exit_instruction_len = exit_insn_len;
vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
/*
@@ -4930,8 +4930,9 @@ vmabort:
* and modify vmcs12 to make it see what it would expect to see there if
* L2 was its real guest. Must only be called when in L2 (is_guest_mode())
*/
-void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
- u32 exit_intr_info, unsigned long exit_qualification)
+void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
+ u32 exit_intr_info, unsigned long exit_qualification,
+ u32 exit_insn_len)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -4981,7 +4982,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
if (vm_exit_reason != -1)
prepare_vmcs12(vcpu, vmcs12, vm_exit_reason,
- exit_intr_info, exit_qualification);
+ exit_intr_info, exit_qualification,
+ exit_insn_len);
/*
* Must happen outside of sync_vmcs02_to_vmcs12() as it will
@@ -5071,7 +5073,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
vmx->nested.need_vmcs12_to_shadow_sync = true;
/* in case we halted in L2 */
- vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
if (likely(!vmx->fail)) {
if (vm_exit_reason != -1)
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 2c296b6abb8c..6eedcfc91070 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -26,8 +26,26 @@ void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu);
enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
bool from_vmentry);
bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu);
-void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
- u32 exit_intr_info, unsigned long exit_qualification);
+void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
+ u32 exit_intr_info, unsigned long exit_qualification,
+ u32 exit_insn_len);
+
+static inline void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
+ u32 exit_intr_info,
+ unsigned long exit_qualification)
+{
+ u32 exit_insn_len;
+
+ if (to_vmx(vcpu)->fail || vm_exit_reason == -1 ||
+ (vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
+ exit_insn_len = 0;
+ else
+ exit_insn_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+
+ __nested_vmx_vmexit(vcpu, vm_exit_reason, exit_intr_info,
+ exit_qualification, exit_insn_len);
+}
+
void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu);
int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata);
diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
index ec08fa3caf43..51116fe69a50 100644
--- a/arch/x86/kvm/vmx/posted_intr.c
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -31,6 +31,8 @@ static DEFINE_PER_CPU(struct list_head, wakeup_vcpus_on_cpu);
*/
static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock);
+#define PI_LOCK_SCHED_OUT SINGLE_DEPTH_NESTING
+
static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
{
return &(to_vmx(vcpu)->pi_desc);
@@ -89,9 +91,20 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
* current pCPU if the task was migrated.
*/
if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) {
- raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
+ raw_spinlock_t *spinlock = &per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu);
+
+ /*
+ * In addition to taking the wakeup lock for the regular/IRQ
+ * context, tell lockdep it is being taken for the "sched out"
+ * context as well. vCPU loads happens in task context, and
+ * this is taking the lock of the *previous* CPU, i.e. can race
+ * with both the scheduler and the wakeup handler.
+ */
+ raw_spin_lock(spinlock);
+ spin_acquire(&spinlock->dep_map, PI_LOCK_SCHED_OUT, 0, _RET_IP_);
list_del(&vmx->pi_wakeup_list);
- raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
+ spin_release(&spinlock->dep_map, _RET_IP_);
+ raw_spin_unlock(spinlock);
}
dest = cpu_physical_id(cpu);
@@ -148,11 +161,23 @@ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct pi_desc old, new;
- unsigned long flags;
- local_irq_save(flags);
+ lockdep_assert_irqs_disabled();
- raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
+ /*
+ * Acquire the wakeup lock using the "sched out" context to workaround
+ * a lockdep false positive. When this is called, schedule() holds
+ * various per-CPU scheduler locks. When the wakeup handler runs, it
+ * holds this CPU's wakeup lock while calling try_to_wake_up(), which
+ * can eventually take the aforementioned scheduler locks, which causes
+ * lockdep to assume there is deadlock.
+ *
+ * Deadlock can't actually occur because IRQs are disabled for the
+ * entirety of the sched_out critical section, i.e. the wakeup handler
+ * can't run while the scheduler locks are held.
+ */
+ raw_spin_lock_nested(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu),
+ PI_LOCK_SCHED_OUT);
list_add_tail(&vmx->pi_wakeup_list,
&per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu));
raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
@@ -176,8 +201,6 @@ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
*/
if (pi_test_on(&new))
__apic_send_IPI_self(POSTED_INTR_WAKEUP_VECTOR);
-
- local_irq_restore(flags);
}
static bool vmx_needs_pi_wakeup(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 3b92f893b239..b70ed72c1783 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2578,6 +2578,34 @@ static u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr)
return ctl_opt & allowed;
}
+#define vmx_check_entry_exit_pairs(pairs, entry_controls, exit_controls) \
+({ \
+ int i, r = 0; \
+ \
+ BUILD_BUG_ON(sizeof(pairs[0].entry_control) != sizeof(entry_controls)); \
+ BUILD_BUG_ON(sizeof(pairs[0].exit_control) != sizeof(exit_controls)); \
+ \
+ for (i = 0; i < ARRAY_SIZE(pairs); i++) { \
+ typeof(entry_controls) n_ctrl = pairs[i].entry_control; \
+ typeof(exit_controls) x_ctrl = pairs[i].exit_control; \
+ \
+ if (!(entry_controls & n_ctrl) == !(exit_controls & x_ctrl)) \
+ continue; \
+ \
+ pr_warn_once("Inconsistent VM-Entry/VM-Exit pair, " \
+ "entry = %llx (%llx), exit = %llx (%llx)\n", \
+ (u64)(entry_controls & n_ctrl), (u64)n_ctrl, \
+ (u64)(exit_controls & x_ctrl), (u64)x_ctrl); \
+ \
+ if (error_on_inconsistent_vmcs_config) \
+ r = -EIO; \
+ \
+ entry_controls &= ~n_ctrl; \
+ exit_controls &= ~x_ctrl; \
+ } \
+ r; \
+})
+
static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
struct vmx_capability *vmx_cap)
{
@@ -2589,7 +2617,6 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
u32 _vmentry_control = 0;
u64 basic_msr;
u64 misc_msr;
- int i;
/*
* LOAD/SAVE_DEBUG_CONTROLS are absent because both are mandatory.
@@ -2693,22 +2720,9 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
&_vmentry_control))
return -EIO;
- for (i = 0; i < ARRAY_SIZE(vmcs_entry_exit_pairs); i++) {
- u32 n_ctrl = vmcs_entry_exit_pairs[i].entry_control;
- u32 x_ctrl = vmcs_entry_exit_pairs[i].exit_control;
-
- if (!(_vmentry_control & n_ctrl) == !(_vmexit_control & x_ctrl))
- continue;
-
- pr_warn_once("Inconsistent VM-Entry/VM-Exit pair, entry = %x, exit = %x\n",
- _vmentry_control & n_ctrl, _vmexit_control & x_ctrl);
-
- if (error_on_inconsistent_vmcs_config)
- return -EIO;
-
- _vmentry_control &= ~n_ctrl;
- _vmexit_control &= ~x_ctrl;
- }
+ if (vmx_check_entry_exit_pairs(vmcs_entry_exit_pairs,
+ _vmentry_control, _vmexit_control))
+ return -EIO;
/*
* Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they
@@ -3519,7 +3533,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
vmcs_writel(GUEST_CR4, hw_cr4);
if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
- kvm_update_cpuid_runtime(vcpu);
+ vcpu->arch.cpuid_dynamic_bits_dirty = true;
}
void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
@@ -5211,6 +5225,12 @@ bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu)
(kvm_get_rflags(vcpu) & X86_EFLAGS_AC);
}
+static bool is_xfd_nm_fault(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.guest_fpu.fpstate->xfd &&
+ !kvm_is_cr0_bit_set(vcpu, X86_CR0_TS);
+}
+
static int handle_exception_nmi(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -5237,7 +5257,8 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
* point.
*/
if (is_nm_fault(intr_info)) {
- kvm_queue_exception(vcpu, NM_VECTOR);
+ kvm_queue_exception_p(vcpu, NM_VECTOR,
+ is_xfd_nm_fault(vcpu) ? vcpu->arch.guest_fpu.xfd_err : 0);
return 1;
}
@@ -5817,7 +5838,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
? PFERR_FETCH_MASK : 0;
/* ept page table entry is present? */
- error_code |= (exit_qualification & EPT_VIOLATION_RWX_MASK)
+ error_code |= (exit_qualification & EPT_VIOLATION_PROT_MASK)
? PFERR_PRESENT_MASK : 0;
if (error_code & EPT_VIOLATION_GVA_IS_VALID)
@@ -5871,11 +5892,35 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu)
return 1;
}
-static bool vmx_emulation_required_with_pending_exception(struct kvm_vcpu *vcpu)
+/*
+ * Returns true if emulation is required (due to the vCPU having invalid state
+ * with unsrestricted guest mode disabled) and KVM can't faithfully emulate the
+ * current vCPU state.
+ */
+static bool vmx_unhandleable_emulation_required(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- return vmx->emulation_required && !vmx->rmode.vm86_active &&
+ if (!vmx->emulation_required)
+ return false;
+
+ /*
+ * It is architecturally impossible for emulation to be required when a
+ * nested VM-Enter is pending completion, as VM-Enter will VM-Fail if
+ * guest state is invalid and unrestricted guest is disabled, i.e. KVM
+ * should synthesize VM-Fail instead emulation L2 code. This path is
+ * only reachable if userspace modifies L2 guest state after KVM has
+ * performed the nested VM-Enter consistency checks.
+ */
+ if (vmx->nested.nested_run_pending)
+ return true;
+
+ /*
+ * KVM only supports emulating exceptions if the vCPU is in Real Mode.
+ * If emulation is required, KVM can't perform a successful VM-Enter to
+ * inject the exception.
+ */
+ return !vmx->rmode.vm86_active &&
(kvm_is_exception_pending(vcpu) || vcpu->arch.exception.injected);
}
@@ -5898,7 +5943,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
if (!kvm_emulate_instruction(vcpu, 0))
return 0;
- if (vmx_emulation_required_with_pending_exception(vcpu)) {
+ if (vmx_unhandleable_emulation_required(vcpu)) {
kvm_prepare_emulation_failure_exit(vcpu);
return 0;
}
@@ -5922,7 +5967,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu)
{
- if (vmx_emulation_required_with_pending_exception(vcpu)) {
+ if (vmx_unhandleable_emulation_required(vcpu)) {
kvm_prepare_emulation_failure_exit(vcpu);
return 0;
}
@@ -6997,16 +7042,15 @@ static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
* MSR value is not clobbered by the host activity before the guest
* has chance to consume it.
*
- * Do not blindly read xfd_err here, since this exception might
- * be caused by L1 interception on a platform which doesn't
- * support xfd at all.
+ * Update the guest's XFD_ERR if and only if XFD is enabled, as the #NM
+ * interception may have been caused by L1 interception. Per the SDM,
+ * XFD_ERR is not modified for non-XFD #NM, i.e. if CR0.TS=1.
*
- * Do it conditionally upon guest_fpu::xfd. xfd_err matters
- * only when xfd contains a non-zero value.
- *
- * Queuing exception is done in vmx_handle_exit. See comment there.
+ * Note, XFD_ERR is updated _before_ the #NM interception check, i.e.
+ * unlike CR2 and DR6, the value is not a payload that is attached to
+ * the #NM exception.
*/
- if (vcpu->arch.guest_fpu.fpstate->xfd)
+ if (is_xfd_nm_fault(vcpu))
rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
}
@@ -7157,13 +7201,17 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
case INTR_TYPE_SOFT_EXCEPTION:
vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
fallthrough;
- case INTR_TYPE_HARD_EXCEPTION:
- if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
- u32 err = vmcs_read32(error_code_field);
- kvm_requeue_exception_e(vcpu, vector, err);
- } else
- kvm_requeue_exception(vcpu, vector);
+ case INTR_TYPE_HARD_EXCEPTION: {
+ u32 error_code = 0;
+
+ if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK)
+ error_code = vmcs_read32(error_code_field);
+
+ kvm_requeue_exception(vcpu, vector,
+ idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK,
+ error_code);
break;
+ }
case INTR_TYPE_SOFT_INTR:
vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
fallthrough;
@@ -8005,38 +8053,50 @@ static __init void vmx_set_cpu_caps(void)
kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
}
-static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
- struct x86_instruction_info *info)
+static bool vmx_is_io_intercepted(struct kvm_vcpu *vcpu,
+ struct x86_instruction_info *info,
+ unsigned long *exit_qualification)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
unsigned short port;
- bool intercept;
int size;
+ bool imm;
+
+ /*
+ * If the 'use IO bitmaps' VM-execution control is 0, IO instruction
+ * VM-exits depend on the 'unconditional IO exiting' VM-execution
+ * control.
+ *
+ * Otherwise, IO instruction VM-exits are controlled by the IO bitmaps.
+ */
+ if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
+ return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
if (info->intercept == x86_intercept_in ||
info->intercept == x86_intercept_ins) {
port = info->src_val;
size = info->dst_bytes;
+ imm = info->src_type == OP_IMM;
} else {
port = info->dst_val;
size = info->src_bytes;
+ imm = info->dst_type == OP_IMM;
}
- /*
- * If the 'use IO bitmaps' VM-execution control is 0, IO instruction
- * VM-exits depend on the 'unconditional IO exiting' VM-execution
- * control.
- *
- * Otherwise, IO instruction VM-exits are controlled by the IO bitmaps.
- */
- if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
- intercept = nested_cpu_has(vmcs12,
- CPU_BASED_UNCOND_IO_EXITING);
- else
- intercept = nested_vmx_check_io_bitmaps(vcpu, port, size);
- /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */
- return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
+ *exit_qualification = ((unsigned long)port << 16) | (size - 1);
+
+ if (info->intercept == x86_intercept_ins ||
+ info->intercept == x86_intercept_outs)
+ *exit_qualification |= BIT(4);
+
+ if (info->rep_prefix)
+ *exit_qualification |= BIT(5);
+
+ if (imm)
+ *exit_qualification |= BIT(6);
+
+ return nested_vmx_check_io_bitmaps(vcpu, port, size);
}
int vmx_check_intercept(struct kvm_vcpu *vcpu,
@@ -8045,26 +8105,34 @@ int vmx_check_intercept(struct kvm_vcpu *vcpu,
struct x86_exception *exception)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ unsigned long exit_qualification = 0;
+ u32 vm_exit_reason;
+ u64 exit_insn_len;
switch (info->intercept) {
- /*
- * RDPID causes #UD if disabled through secondary execution controls.
- * Because it is marked as EmulateOnUD, we need to intercept it here.
- * Note, RDPID is hidden behind ENABLE_RDTSCP.
- */
case x86_intercept_rdpid:
+ /*
+ * RDPID causes #UD if not enabled through secondary execution
+ * controls (ENABLE_RDTSCP). Note, the implicit MSR access to
+ * TSC_AUX is NOT subject to interception, i.e. checking only
+ * the dedicated execution control is architecturally correct.
+ */
if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_RDTSCP)) {
exception->vector = UD_VECTOR;
exception->error_code_valid = false;
return X86EMUL_PROPAGATE_FAULT;
}
- break;
+ return X86EMUL_CONTINUE;
case x86_intercept_in:
case x86_intercept_ins:
case x86_intercept_out:
case x86_intercept_outs:
- return vmx_check_intercept_io(vcpu, info);
+ if (!vmx_is_io_intercepted(vcpu, info, &exit_qualification))
+ return X86EMUL_CONTINUE;
+
+ vm_exit_reason = EXIT_REASON_IO_INSTRUCTION;
+ break;
case x86_intercept_lgdt:
case x86_intercept_lidt:
@@ -8077,7 +8145,24 @@ int vmx_check_intercept(struct kvm_vcpu *vcpu,
if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC))
return X86EMUL_CONTINUE;
- /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */
+ if (info->intercept == x86_intercept_lldt ||
+ info->intercept == x86_intercept_ltr ||
+ info->intercept == x86_intercept_sldt ||
+ info->intercept == x86_intercept_str)
+ vm_exit_reason = EXIT_REASON_LDTR_TR;
+ else
+ vm_exit_reason = EXIT_REASON_GDTR_IDTR;
+ /*
+ * FIXME: Decode the ModR/M to generate the correct exit
+ * qualification for memory operands.
+ */
+ break;
+
+ case x86_intercept_hlt:
+ if (!nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING))
+ return X86EMUL_CONTINUE;
+
+ vm_exit_reason = EXIT_REASON_HLT;
break;
case x86_intercept_pause:
@@ -8090,17 +8175,24 @@ int vmx_check_intercept(struct kvm_vcpu *vcpu,
* the PAUSE.
*/
if ((info->rep_prefix != REPE_PREFIX) ||
- !nested_cpu_has2(vmcs12, CPU_BASED_PAUSE_EXITING))
+ !nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING))
return X86EMUL_CONTINUE;
+ vm_exit_reason = EXIT_REASON_PAUSE_INSTRUCTION;
break;
/* TODO: check more intercepts... */
default:
- break;
+ return X86EMUL_UNHANDLEABLE;
}
- return X86EMUL_UNHANDLEABLE;
+ exit_insn_len = abs_diff((s64)info->next_rip, (s64)info->rip);
+ if (!exit_insn_len || exit_insn_len > X86_MAX_INSTRUCTION_LENGTH)
+ return X86EMUL_UNHANDLEABLE;
+
+ __nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification,
+ exit_insn_len);
+ return X86EMUL_INTERCEPTED;
}
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4b64ab350bcd..3712dde0bf9d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -800,9 +800,9 @@ static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vecto
ex->payload = payload;
}
-static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
- unsigned nr, bool has_error, u32 error_code,
- bool has_payload, unsigned long payload, bool reinject)
+static void kvm_multiple_exception(struct kvm_vcpu *vcpu, unsigned int nr,
+ bool has_error, u32 error_code,
+ bool has_payload, unsigned long payload)
{
u32 prev_nr;
int class1, class2;
@@ -810,13 +810,10 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
kvm_make_request(KVM_REQ_EVENT, vcpu);
/*
- * If the exception is destined for L2 and isn't being reinjected,
- * morph it to a VM-Exit if L1 wants to intercept the exception. A
- * previously injected exception is not checked because it was checked
- * when it was original queued, and re-checking is incorrect if _L1_
- * injected the exception, in which case it's exempt from interception.
+ * If the exception is destined for L2, morph it to a VM-Exit if L1
+ * wants to intercept the exception.
*/
- if (!reinject && is_guest_mode(vcpu) &&
+ if (is_guest_mode(vcpu) &&
kvm_x86_ops.nested_ops->is_exception_vmexit(vcpu, nr, error_code)) {
kvm_queue_exception_vmexit(vcpu, nr, has_error, error_code,
has_payload, payload);
@@ -825,28 +822,9 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
queue:
- if (reinject) {
- /*
- * On VM-Entry, an exception can be pending if and only
- * if event injection was blocked by nested_run_pending.
- * In that case, however, vcpu_enter_guest() requests an
- * immediate exit, and the guest shouldn't proceed far
- * enough to need reinjection.
- */
- WARN_ON_ONCE(kvm_is_exception_pending(vcpu));
- vcpu->arch.exception.injected = true;
- if (WARN_ON_ONCE(has_payload)) {
- /*
- * A reinjected event has already
- * delivered its payload.
- */
- has_payload = false;
- payload = 0;
- }
- } else {
- vcpu->arch.exception.pending = true;
- vcpu->arch.exception.injected = false;
- }
+ vcpu->arch.exception.pending = true;
+ vcpu->arch.exception.injected = false;
+
vcpu->arch.exception.has_error_code = has_error;
vcpu->arch.exception.vector = nr;
vcpu->arch.exception.error_code = error_code;
@@ -887,30 +865,53 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
{
- kvm_multiple_exception(vcpu, nr, false, 0, false, 0, false);
+ kvm_multiple_exception(vcpu, nr, false, 0, false, 0);
}
EXPORT_SYMBOL_GPL(kvm_queue_exception);
-void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
-{
- kvm_multiple_exception(vcpu, nr, false, 0, false, 0, true);
-}
-EXPORT_SYMBOL_GPL(kvm_requeue_exception);
void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
unsigned long payload)
{
- kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false);
+ kvm_multiple_exception(vcpu, nr, false, 0, true, payload);
}
EXPORT_SYMBOL_GPL(kvm_queue_exception_p);
static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
u32 error_code, unsigned long payload)
{
- kvm_multiple_exception(vcpu, nr, true, error_code,
- true, payload, false);
+ kvm_multiple_exception(vcpu, nr, true, error_code, true, payload);
}
+void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr,
+ bool has_error_code, u32 error_code)
+{
+
+ /*
+ * On VM-Entry, an exception can be pending if and only if event
+ * injection was blocked by nested_run_pending. In that case, however,
+ * vcpu_enter_guest() requests an immediate exit, and the guest
+ * shouldn't proceed far enough to need reinjection.
+ */
+ WARN_ON_ONCE(kvm_is_exception_pending(vcpu));
+
+ /*
+ * Do not check for interception when injecting an event for L2, as the
+ * exception was checked for intercept when it was original queued, and
+ * re-checking is incorrect if _L1_ injected the exception, in which
+ * case it's exempt from interception.
+ */
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+
+ vcpu->arch.exception.injected = true;
+ vcpu->arch.exception.has_error_code = has_error_code;
+ vcpu->arch.exception.vector = nr;
+ vcpu->arch.exception.error_code = error_code;
+ vcpu->arch.exception.has_payload = false;
+ vcpu->arch.exception.payload = 0;
+}
+EXPORT_SYMBOL_GPL(kvm_requeue_exception);
+
int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
{
if (err)
@@ -980,16 +981,10 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu)
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
{
- kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, false);
+ kvm_multiple_exception(vcpu, nr, true, error_code, false, 0);
}
EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
-void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
-{
- kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, true);
-}
-EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
-
/*
* Checks if cpl <= required_cpl; if true, return true. Otherwise queue
* a #GP and return false.
@@ -1264,7 +1259,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
vcpu->arch.xcr0 = xcr0;
if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
- kvm_update_cpuid_runtime(vcpu);
+ vcpu->arch.cpuid_dynamic_bits_dirty = true;
return 0;
}
@@ -2080,10 +2075,20 @@ EXPORT_SYMBOL_GPL(kvm_handle_invalid_op);
static int kvm_emulate_monitor_mwait(struct kvm_vcpu *vcpu, const char *insn)
{
- if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS) &&
- !guest_cpu_cap_has(vcpu, X86_FEATURE_MWAIT))
+ bool enabled;
+
+ if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS))
+ goto emulate_as_nop;
+
+ if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
+ enabled = guest_cpu_cap_has(vcpu, X86_FEATURE_MWAIT);
+ else
+ enabled = vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT;
+
+ if (!enabled)
return kvm_handle_invalid_op(vcpu);
+emulate_as_nop:
pr_warn_once("%s instruction emulated as NOP!\n", insn);
return kvm_emulate_as_nop(vcpu);
}
@@ -2569,6 +2574,9 @@ EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_multiplier);
static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 l1_offset)
{
+ if (vcpu->arch.guest_tsc_protected)
+ return;
+
trace_kvm_write_tsc_offset(vcpu->vcpu_id,
vcpu->arch.l1_tsc_offset,
l1_offset);
@@ -2626,12 +2634,18 @@ static inline bool kvm_check_tsc_unstable(void)
* participates in.
*/
static void __kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 offset, u64 tsc,
- u64 ns, bool matched)
+ u64 ns, bool matched, bool user_set_tsc)
{
struct kvm *kvm = vcpu->kvm;
lockdep_assert_held(&kvm->arch.tsc_write_lock);
+ if (vcpu->arch.guest_tsc_protected)
+ return;
+
+ if (user_set_tsc)
+ vcpu->kvm->arch.user_set_tsc = true;
+
/*
* We also track th most recent recorded KHZ, write and time to
* allow the matching interval to be extended at each write.
@@ -2717,8 +2731,6 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 *user_value)
}
}
- if (user_value)
- kvm->arch.user_set_tsc = true;
/*
* For a reliable TSC, we can match TSC offsets, and for an unstable
@@ -2738,7 +2750,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 *user_value)
matched = true;
}
- __kvm_synchronize_tsc(vcpu, offset, data, ns, matched);
+ __kvm_synchronize_tsc(vcpu, offset, data, ns, matched, !!user_value);
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
}
@@ -3116,15 +3128,17 @@ u64 get_kvmclock_ns(struct kvm *kvm)
return data.clock;
}
-static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
+static void kvm_setup_guest_pvclock(struct pvclock_vcpu_time_info *ref_hv_clock,
+ struct kvm_vcpu *vcpu,
struct gfn_to_pfn_cache *gpc,
- unsigned int offset,
- bool force_tsc_unstable)
+ unsigned int offset)
{
- struct kvm_vcpu_arch *vcpu = &v->arch;
struct pvclock_vcpu_time_info *guest_hv_clock;
+ struct pvclock_vcpu_time_info hv_clock;
unsigned long flags;
+ memcpy(&hv_clock, ref_hv_clock, sizeof(hv_clock));
+
read_lock_irqsave(&gpc->lock, flags);
while (!kvm_gpc_check(gpc, offset + sizeof(*guest_hv_clock))) {
read_unlock_irqrestore(&gpc->lock, flags);
@@ -3144,52 +3158,34 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
* it is consistent.
*/
- guest_hv_clock->version = vcpu->hv_clock.version = (guest_hv_clock->version + 1) | 1;
+ guest_hv_clock->version = hv_clock.version = (guest_hv_clock->version + 1) | 1;
smp_wmb();
/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
- vcpu->hv_clock.flags |= (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
-
- if (vcpu->pvclock_set_guest_stopped_request) {
- vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
- vcpu->pvclock_set_guest_stopped_request = false;
- }
-
- memcpy(guest_hv_clock, &vcpu->hv_clock, sizeof(*guest_hv_clock));
+ hv_clock.flags |= (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
- if (force_tsc_unstable)
- guest_hv_clock->flags &= ~PVCLOCK_TSC_STABLE_BIT;
+ memcpy(guest_hv_clock, &hv_clock, sizeof(*guest_hv_clock));
smp_wmb();
- guest_hv_clock->version = ++vcpu->hv_clock.version;
+ guest_hv_clock->version = ++hv_clock.version;
kvm_gpc_mark_dirty_in_slot(gpc);
read_unlock_irqrestore(&gpc->lock, flags);
- trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
+ trace_kvm_pvclock_update(vcpu->vcpu_id, &hv_clock);
}
-static int kvm_guest_time_update(struct kvm_vcpu *v)
+int kvm_guest_time_update(struct kvm_vcpu *v)
{
+ struct pvclock_vcpu_time_info hv_clock = {};
unsigned long flags, tgt_tsc_khz;
unsigned seq;
struct kvm_vcpu_arch *vcpu = &v->arch;
struct kvm_arch *ka = &v->kvm->arch;
s64 kernel_ns;
u64 tsc_timestamp, host_tsc;
- u8 pvclock_flags;
bool use_master_clock;
-#ifdef CONFIG_KVM_XEN
- /*
- * For Xen guests we may need to override PVCLOCK_TSC_STABLE_BIT as unless
- * explicitly told to use TSC as its clocksource Xen will not set this bit.
- * This default behaviour led to bugs in some guest kernels which cause
- * problems if they observe PVCLOCK_TSC_STABLE_BIT in the pvclock flags.
- */
- bool xen_pvclock_tsc_unstable =
- ka->xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE;
-#endif
kernel_ns = 0;
host_tsc = 0;
@@ -3250,35 +3246,57 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
- &vcpu->hv_clock.tsc_shift,
- &vcpu->hv_clock.tsc_to_system_mul);
+ &vcpu->pvclock_tsc_shift,
+ &vcpu->pvclock_tsc_mul);
vcpu->hw_tsc_khz = tgt_tsc_khz;
- kvm_xen_update_tsc_info(v);
}
- vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
- vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
+ hv_clock.tsc_shift = vcpu->pvclock_tsc_shift;
+ hv_clock.tsc_to_system_mul = vcpu->pvclock_tsc_mul;
+ hv_clock.tsc_timestamp = tsc_timestamp;
+ hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
vcpu->last_guest_tsc = tsc_timestamp;
/* If the host uses TSC clocksource, then it is stable */
- pvclock_flags = 0;
+ hv_clock.flags = 0;
if (use_master_clock)
- pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
+ hv_clock.flags |= PVCLOCK_TSC_STABLE_BIT;
- vcpu->hv_clock.flags = pvclock_flags;
+ if (vcpu->pv_time.active) {
+ /*
+ * GUEST_STOPPED is only supported by kvmclock, and KVM's
+ * historic behavior is to only process the request if kvmclock
+ * is active/enabled.
+ */
+ if (vcpu->pvclock_set_guest_stopped_request) {
+ hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
+ vcpu->pvclock_set_guest_stopped_request = false;
+ }
+ kvm_setup_guest_pvclock(&hv_clock, v, &vcpu->pv_time, 0);
+
+ hv_clock.flags &= ~PVCLOCK_GUEST_STOPPED;
+ }
+
+ kvm_hv_setup_tsc_page(v->kvm, &hv_clock);
- if (vcpu->pv_time.active)
- kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0, false);
#ifdef CONFIG_KVM_XEN
+ /*
+ * For Xen guests we may need to override PVCLOCK_TSC_STABLE_BIT as unless
+ * explicitly told to use TSC as its clocksource Xen will not set this bit.
+ * This default behaviour led to bugs in some guest kernels which cause
+ * problems if they observe PVCLOCK_TSC_STABLE_BIT in the pvclock flags.
+ *
+ * Note! Clear TSC_STABLE only for Xen clocks, i.e. the order matters!
+ */
+ if (ka->xen.hvm_config.flags & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE)
+ hv_clock.flags &= ~PVCLOCK_TSC_STABLE_BIT;
+
if (vcpu->xen.vcpu_info_cache.active)
- kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_info_cache,
- offsetof(struct compat_vcpu_info, time),
- xen_pvclock_tsc_unstable);
+ kvm_setup_guest_pvclock(&hv_clock, v, &vcpu->xen.vcpu_info_cache,
+ offsetof(struct compat_vcpu_info, time));
if (vcpu->xen.vcpu_time_info_cache.active)
- kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0,
- xen_pvclock_tsc_unstable);
+ kvm_setup_guest_pvclock(&hv_clock, v, &vcpu->xen.vcpu_time_info_cache, 0);
#endif
- kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
return 0;
}
@@ -3544,7 +3562,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
sizeof(u64)))
return 1;
- vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
+ vcpu->arch.apf.send_always = (data & KVM_ASYNC_PF_SEND_ALWAYS);
vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
kvm_async_pf_wakeup_all(vcpu);
@@ -3733,7 +3751,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
u32 msr = msr_info->index;
u64 data = msr_info->data;
- if (msr && msr == vcpu->kvm->arch.xen_hvm_config.msr)
+ /*
+ * Do not allow host-initiated writes to trigger the Xen hypercall
+ * page setup; it could incur locking paths which are not expected
+ * if userspace sets the MSR in an unusual location.
+ */
+ if (kvm_xen_is_hypercall_page_msr(vcpu->kvm, msr) &&
+ !msr_info->host_initiated)
return kvm_xen_write_hypercall_page(vcpu, data);
switch (msr) {
@@ -3889,7 +3913,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_XMM3))
return 1;
vcpu->arch.ia32_misc_enable_msr = data;
- kvm_update_cpuid_runtime(vcpu);
+ vcpu->arch.cpuid_dynamic_bits_dirty = true;
} else {
vcpu->arch.ia32_misc_enable_msr = data;
}
@@ -3906,7 +3930,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_TSC:
if (msr_info->host_initiated) {
kvm_synchronize_tsc(vcpu, &data);
- } else {
+ } else if (!vcpu->arch.guest_tsc_protected) {
u64 adj = kvm_compute_l1_tsc_offset(vcpu, data) - vcpu->arch.l1_tsc_offset;
adjust_tsc_offset_guest(vcpu, adj);
vcpu->arch.ia32_tsc_adjust_msr += adj;
@@ -3924,7 +3948,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (data & ~kvm_caps.supported_xss)
return 1;
vcpu->arch.ia32_xss = data;
- kvm_update_cpuid_runtime(vcpu);
+ vcpu->arch.cpuid_dynamic_bits_dirty = true;
break;
case MSR_SMI_COUNT:
if (!msr_info->host_initiated)
@@ -4573,6 +4597,11 @@ static bool kvm_is_vm_type_supported(unsigned long type)
return type < 32 && (kvm_caps.supported_vm_types & BIT(type));
}
+static inline u32 kvm_sync_valid_fields(struct kvm *kvm)
+{
+ return kvm && kvm->arch.has_protected_state ? 0 : KVM_SYNC_X86_VALID_FIELDS;
+}
+
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r = 0;
@@ -4681,7 +4710,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
#endif
case KVM_CAP_SYNC_REGS:
- r = KVM_SYNC_X86_VALID_FIELDS;
+ r = kvm_sync_valid_fields(kvm);
break;
case KVM_CAP_ADJUST_CLOCK:
r = KVM_CLOCK_VALID_FLAGS;
@@ -4986,7 +5015,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
u64 offset = kvm_compute_l1_tsc_offset(vcpu,
vcpu->arch.last_guest_tsc);
kvm_vcpu_write_tsc_offset(vcpu, offset);
- vcpu->arch.tsc_catchup = 1;
+ if (!vcpu->arch.guest_tsc_protected)
+ vcpu->arch.tsc_catchup = 1;
}
if (kvm_lapic_hv_timer_in_use(vcpu))
@@ -5725,8 +5755,7 @@ static int kvm_arch_tsc_set_attr(struct kvm_vcpu *vcpu,
tsc = kvm_scale_tsc(rdtsc(), vcpu->arch.l1_tsc_scaling_ratio) + offset;
ns = get_kvmclock_base_ns();
- kvm->arch.user_set_tsc = true;
- __kvm_synchronize_tsc(vcpu, offset, tsc, ns, matched);
+ __kvm_synchronize_tsc(vcpu, offset, tsc, ns, matched, true);
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
r = 0;
@@ -6905,23 +6934,15 @@ static int kvm_arch_suspend_notifier(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
unsigned long i;
- int ret = 0;
-
- mutex_lock(&kvm->lock);
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (!vcpu->arch.pv_time.active)
- continue;
- ret = kvm_set_guest_paused(vcpu);
- if (ret) {
- kvm_err("Failed to pause guest VCPU%d: %d\n",
- vcpu->vcpu_id, ret);
- break;
- }
- }
- mutex_unlock(&kvm->lock);
+ /*
+ * Ignore the return, marking the guest paused only "fails" if the vCPU
+ * isn't using kvmclock; continuing on is correct and desirable.
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ (void)kvm_set_guest_paused(vcpu);
- return ret ? NOTIFY_BAD : NOTIFY_DONE;
+ return NOTIFY_DONE;
}
int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state)
@@ -11220,9 +11241,7 @@ static inline int vcpu_block(struct kvm_vcpu *vcpu)
switch(vcpu->arch.mp_state) {
case KVM_MP_STATE_HALTED:
case KVM_MP_STATE_AP_RESET_HOLD:
- vcpu->arch.pv.pv_unhalted = false;
- vcpu->arch.mp_state =
- KVM_MP_STATE_RUNNABLE;
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
fallthrough;
case KVM_MP_STATE_RUNNABLE:
vcpu->arch.apf.halted = false;
@@ -11299,9 +11318,8 @@ static int __kvm_emulate_halt(struct kvm_vcpu *vcpu, int state, int reason)
++vcpu->stat.halt_exits;
if (lapic_in_kernel(vcpu)) {
if (kvm_vcpu_has_events(vcpu))
- vcpu->arch.pv.pv_unhalted = false;
- else
- vcpu->arch.mp_state = state;
+ state = KVM_MP_STATE_RUNNABLE;
+ kvm_set_mp_state(vcpu, state);
return 1;
} else {
vcpu->run->exit_reason = reason;
@@ -11474,6 +11492,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{
struct kvm_queued_exception *ex = &vcpu->arch.exception;
struct kvm_run *kvm_run = vcpu->run;
+ u32 sync_valid_fields;
int r;
r = kvm_mmu_post_init_vm(vcpu->kvm);
@@ -11519,8 +11538,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
goto out;
}
- if ((kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) ||
- (kvm_run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)) {
+ sync_valid_fields = kvm_sync_valid_fields(vcpu->kvm);
+ if ((kvm_run->kvm_valid_regs & ~sync_valid_fields) ||
+ (kvm_run->kvm_dirty_regs & ~sync_valid_fields)) {
r = -EINVAL;
goto out;
}
@@ -11578,7 +11598,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
out:
kvm_put_guest_fpu(vcpu);
- if (kvm_run->kvm_valid_regs)
+ if (kvm_run->kvm_valid_regs && likely(!vcpu->arch.guest_state_protected))
store_regs(vcpu);
post_kvm_run_save(vcpu);
kvm_vcpu_srcu_read_unlock(vcpu);
@@ -11766,6 +11786,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
if (kvm_mpx_supported())
kvm_load_guest_fpu(vcpu);
+ kvm_vcpu_srcu_read_lock(vcpu);
+
r = kvm_apic_accept_events(vcpu);
if (r < 0)
goto out;
@@ -11779,6 +11801,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
mp_state->mp_state = vcpu->arch.mp_state;
out:
+ kvm_vcpu_srcu_read_unlock(vcpu);
+
if (kvm_mpx_supported())
kvm_put_guest_fpu(vcpu);
vcpu_put(vcpu);
@@ -11821,10 +11845,10 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
goto out;
if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
- vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_INIT_RECEIVED);
set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
} else
- vcpu->arch.mp_state = mp_state->mp_state;
+ kvm_set_mp_state(vcpu, mp_state->mp_state);
kvm_make_request(KVM_REQ_EVENT, vcpu);
ret = 0;
@@ -11951,7 +11975,7 @@ static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
!is_protmode(vcpu))
- vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
return 0;
}
@@ -12254,9 +12278,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
kvm_gpc_init(&vcpu->arch.pv_time, vcpu->kvm);
if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
- vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
else
- vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_UNINITIALIZED);
r = kvm_mmu_create(vcpu);
if (r < 0)
@@ -12363,6 +12387,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
int idx;
+ kvm_clear_async_pf_completion_queue(vcpu);
+ kvm_mmu_unload(vcpu);
+
kvmclock_reset(vcpu);
kvm_x86_call(vcpu_free)(vcpu);
@@ -12756,31 +12783,6 @@ out:
return ret;
}
-static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
-{
- vcpu_load(vcpu);
- kvm_mmu_unload(vcpu);
- vcpu_put(vcpu);
-}
-
-static void kvm_unload_vcpu_mmus(struct kvm *kvm)
-{
- unsigned long i;
- struct kvm_vcpu *vcpu;
-
- kvm_for_each_vcpu(i, vcpu, kvm) {
- kvm_clear_async_pf_completion_queue(vcpu);
- kvm_unload_vcpu_mmu(vcpu);
- }
-}
-
-void kvm_arch_sync_events(struct kvm *kvm)
-{
- cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
- cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
- kvm_free_pit(kvm);
-}
-
/**
* __x86_set_memory_region: Setup KVM internal memory slot
*
@@ -12859,6 +12861,17 @@ EXPORT_SYMBOL_GPL(__x86_set_memory_region);
void kvm_arch_pre_destroy_vm(struct kvm *kvm)
{
+ /*
+ * Stop all background workers and kthreads before destroying vCPUs, as
+ * iterating over vCPUs in a different task while vCPUs are being freed
+ * is unsafe, i.e. will lead to use-after-free. The PIT also needs to
+ * be stopped before IRQ routing is freed.
+ */
+ cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
+ cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
+
+ kvm_free_pit(kvm);
+
kvm_mmu_pre_destroy_vm(kvm);
}
@@ -12878,9 +12891,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
__x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
mutex_unlock(&kvm->slots_lock);
}
- kvm_unload_vcpu_mmus(kvm);
kvm_destroy_vcpus(kvm);
- kvm_x86_call(vm_destroy)(kvm);
kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
kvm_pic_destroy(kvm);
kvm_ioapic_destroy(kvm);
@@ -12890,6 +12901,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_page_track_cleanup(kvm);
kvm_xen_destroy_vm(kvm);
kvm_hv_destroy_vm(kvm);
+ kvm_x86_call(vm_destroy)(kvm);
}
static void memslot_rmap_free(struct kvm_memory_slot *slot)
@@ -13383,8 +13395,8 @@ static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
if (!kvm_pv_async_pf_enabled(vcpu))
return false;
- if (vcpu->arch.apf.send_user_only &&
- kvm_x86_call(get_cpl)(vcpu) == 0)
+ if (!vcpu->arch.apf.send_always &&
+ (vcpu->arch.guest_state_protected || !kvm_x86_call(get_cpl)(vcpu)))
return false;
if (is_guest_mode(vcpu)) {
@@ -13474,7 +13486,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
}
vcpu->arch.apf.halted = false;
- vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
}
void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 91e50a513100..9dc32a409076 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -121,6 +121,13 @@ static inline bool kvm_vcpu_has_run(struct kvm_vcpu *vcpu)
return vcpu->arch.last_vmentry_cpu != -1;
}
+static inline void kvm_set_mp_state(struct kvm_vcpu *vcpu, int mp_state)
+{
+ vcpu->arch.mp_state = mp_state;
+ if (mp_state == KVM_MP_STATE_RUNNABLE)
+ vcpu->arch.pv.pv_unhalted = false;
+}
+
static inline bool kvm_is_exception_pending(struct kvm_vcpu *vcpu)
{
return vcpu->arch.exception.pending ||
@@ -362,6 +369,7 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
u64 get_kvmclock_ns(struct kvm *kvm);
uint64_t kvm_get_wall_clock_epoch(struct kvm *kvm);
bool kvm_get_monotonic_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp);
+int kvm_guest_time_update(struct kvm_vcpu *v);
int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
gva_t addr, void *val, unsigned int bytes,
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index a909b817b9c0..7449be30d701 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -150,11 +150,46 @@ static enum hrtimer_restart xen_timer_callback(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
+static int xen_get_guest_pvclock(struct kvm_vcpu *vcpu,
+ struct pvclock_vcpu_time_info *hv_clock,
+ struct gfn_to_pfn_cache *gpc,
+ unsigned int offset)
+{
+ unsigned long flags;
+ int r;
+
+ read_lock_irqsave(&gpc->lock, flags);
+ while (!kvm_gpc_check(gpc, offset + sizeof(*hv_clock))) {
+ read_unlock_irqrestore(&gpc->lock, flags);
+
+ r = kvm_gpc_refresh(gpc, offset + sizeof(*hv_clock));
+ if (r)
+ return r;
+
+ read_lock_irqsave(&gpc->lock, flags);
+ }
+
+ memcpy(hv_clock, gpc->khva + offset, sizeof(*hv_clock));
+ read_unlock_irqrestore(&gpc->lock, flags);
+
+ /*
+ * Sanity check TSC shift+multiplier to verify the guest's view of time
+ * is more or less consistent.
+ */
+ if (hv_clock->tsc_shift != vcpu->arch.pvclock_tsc_shift ||
+ hv_clock->tsc_to_system_mul != vcpu->arch.pvclock_tsc_mul)
+ return -EINVAL;
+
+ return 0;
+}
+
static void kvm_xen_start_timer(struct kvm_vcpu *vcpu, u64 guest_abs,
bool linux_wa)
{
+ struct kvm_vcpu_xen *xen = &vcpu->arch.xen;
int64_t kernel_now, delta;
uint64_t guest_now;
+ int r = -EOPNOTSUPP;
/*
* The guest provides the requested timeout in absolute nanoseconds
@@ -173,10 +208,29 @@ static void kvm_xen_start_timer(struct kvm_vcpu *vcpu, u64 guest_abs,
* the absolute CLOCK_MONOTONIC time at which the timer should
* fire.
*/
- if (vcpu->arch.hv_clock.version && vcpu->kvm->arch.use_master_clock &&
- static_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+ do {
+ struct pvclock_vcpu_time_info hv_clock;
uint64_t host_tsc, guest_tsc;
+ if (!static_cpu_has(X86_FEATURE_CONSTANT_TSC) ||
+ !vcpu->kvm->arch.use_master_clock)
+ break;
+
+ /*
+ * If both Xen PV clocks are active, arbitrarily try to use the
+ * compat clock first, but also try to use the non-compat clock
+ * if the compat clock is unusable. The two PV clocks hold the
+ * same information, but it's possible one (or both) is stale
+ * and/or currently unreachable.
+ */
+ if (xen->vcpu_info_cache.active)
+ r = xen_get_guest_pvclock(vcpu, &hv_clock, &xen->vcpu_info_cache,
+ offsetof(struct compat_vcpu_info, time));
+ if (r && xen->vcpu_time_info_cache.active)
+ r = xen_get_guest_pvclock(vcpu, &hv_clock, &xen->vcpu_time_info_cache, 0);
+ if (r)
+ break;
+
if (!IS_ENABLED(CONFIG_64BIT) ||
!kvm_get_monotonic_and_clockread(&kernel_now, &host_tsc)) {
/*
@@ -197,9 +251,10 @@ static void kvm_xen_start_timer(struct kvm_vcpu *vcpu, u64 guest_abs,
/* Calculate the guest kvmclock as the guest would do it. */
guest_tsc = kvm_read_l1_tsc(vcpu, host_tsc);
- guest_now = __pvclock_read_cycles(&vcpu->arch.hv_clock,
- guest_tsc);
- } else {
+ guest_now = __pvclock_read_cycles(&hv_clock, guest_tsc);
+ } while (0);
+
+ if (r) {
/*
* Without CONSTANT_TSC, get_kvmclock_ns() is the only option.
*
@@ -1280,10 +1335,10 @@ int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
* Note, truncation is a non-issue as 'lm' is guaranteed to be
* false for a 32-bit kernel, i.e. when hva_t is only 4 bytes.
*/
- hva_t blob_addr = lm ? kvm->arch.xen_hvm_config.blob_addr_64
- : kvm->arch.xen_hvm_config.blob_addr_32;
- u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
- : kvm->arch.xen_hvm_config.blob_size_32;
+ hva_t blob_addr = lm ? kvm->arch.xen.hvm_config.blob_addr_64
+ : kvm->arch.xen.hvm_config.blob_addr_32;
+ u8 blob_size = lm ? kvm->arch.xen.hvm_config.blob_size_64
+ : kvm->arch.xen.hvm_config.blob_size_32;
u8 *page;
int ret;
@@ -1324,15 +1379,24 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
xhc->blob_size_32 || xhc->blob_size_64))
return -EINVAL;
+ /*
+ * Restrict the MSR to the range that is unofficially reserved for
+ * synthetic, virtualization-defined MSRs, e.g. to prevent confusing
+ * KVM by colliding with a real MSR that requires special handling.
+ */
+ if (xhc->msr &&
+ (xhc->msr < KVM_XEN_MSR_MIN_INDEX || xhc->msr > KVM_XEN_MSR_MAX_INDEX))
+ return -EINVAL;
+
mutex_lock(&kvm->arch.xen.xen_lock);
- if (xhc->msr && !kvm->arch.xen_hvm_config.msr)
+ if (xhc->msr && !kvm->arch.xen.hvm_config.msr)
static_branch_inc(&kvm_xen_enabled.key);
- else if (!xhc->msr && kvm->arch.xen_hvm_config.msr)
+ else if (!xhc->msr && kvm->arch.xen.hvm_config.msr)
static_branch_slow_dec_deferred(&kvm_xen_enabled);
- old_flags = kvm->arch.xen_hvm_config.flags;
- memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc));
+ old_flags = kvm->arch.xen.hvm_config.flags;
+ memcpy(&kvm->arch.xen.hvm_config, xhc, sizeof(*xhc));
mutex_unlock(&kvm->arch.xen.xen_lock);
@@ -1413,7 +1477,7 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
int i;
if (!lapic_in_kernel(vcpu) ||
- !(vcpu->kvm->arch.xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND))
+ !(vcpu->kvm->arch.xen.hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND))
return false;
if (IS_ENABLED(CONFIG_64BIT) && !longmode) {
@@ -1480,7 +1544,7 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
set_bit(vcpu->vcpu_idx, vcpu->kvm->arch.xen.poll_mask);
if (!wait_pending_event(vcpu, sched_poll.nr_ports, ports)) {
- vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_HALTED);
if (sched_poll.timeout)
mod_timer(&vcpu->arch.xen.poll_timer,
@@ -1491,7 +1555,7 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
if (sched_poll.timeout)
del_timer(&vcpu->arch.xen.poll_timer);
- vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
}
vcpu->arch.xen.poll_evtchn = 0;
@@ -2247,29 +2311,6 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
del_timer_sync(&vcpu->arch.xen.poll_timer);
}
-void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *entry;
- u32 function;
-
- if (!vcpu->arch.xen.cpuid.base)
- return;
-
- function = vcpu->arch.xen.cpuid.base | XEN_CPUID_LEAF(3);
- if (function > vcpu->arch.xen.cpuid.limit)
- return;
-
- entry = kvm_find_cpuid_entry_index(vcpu, function, 1);
- if (entry) {
- entry->ecx = vcpu->arch.hv_clock.tsc_to_system_mul;
- entry->edx = vcpu->arch.hv_clock.tsc_shift;
- }
-
- entry = kvm_find_cpuid_entry_index(vcpu, function, 2);
- if (entry)
- entry->eax = vcpu->arch.hw_tsc_khz;
-}
-
void kvm_xen_init_vm(struct kvm *kvm)
{
mutex_init(&kvm->arch.xen.xen_lock);
@@ -2291,6 +2332,6 @@ void kvm_xen_destroy_vm(struct kvm *kvm)
}
idr_destroy(&kvm->arch.xen.evtchn_ports);
- if (kvm->arch.xen_hvm_config.msr)
+ if (kvm->arch.xen.hvm_config.msr)
static_branch_slow_dec_deferred(&kvm_xen_enabled);
}
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
index f5841d9000ae..59e6128a7bd3 100644
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -9,6 +9,7 @@
#ifndef __ARCH_X86_KVM_XEN_H__
#define __ARCH_X86_KVM_XEN_H__
+#include <asm/xen/cpuid.h>
#include <asm/xen/hypervisor.h>
#ifdef CONFIG_KVM_XEN
@@ -35,7 +36,6 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe,
int kvm_xen_setup_evtchn(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue);
-void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu);
static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu)
{
@@ -50,16 +50,32 @@ static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu)
kvm_xen_inject_vcpu_vector(vcpu);
}
+static inline bool kvm_xen_is_tsc_leaf(struct kvm_vcpu *vcpu, u32 function)
+{
+ return static_branch_unlikely(&kvm_xen_enabled.key) &&
+ vcpu->arch.xen.cpuid.base &&
+ function <= vcpu->arch.xen.cpuid.limit &&
+ function == (vcpu->arch.xen.cpuid.base | XEN_CPUID_LEAF(3));
+}
+
static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
{
return static_branch_unlikely(&kvm_xen_enabled.key) &&
- kvm->arch.xen_hvm_config.msr;
+ kvm->arch.xen.hvm_config.msr;
+}
+
+static inline bool kvm_xen_is_hypercall_page_msr(struct kvm *kvm, u32 msr)
+{
+ if (!static_branch_unlikely(&kvm_xen_enabled.key))
+ return false;
+
+ return msr && msr == kvm->arch.xen.hvm_config.msr;
}
static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
{
return static_branch_unlikely(&kvm_xen_enabled.key) &&
- (kvm->arch.xen_hvm_config.flags &
+ (kvm->arch.xen.hvm_config.flags &
KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL);
}
@@ -124,6 +140,11 @@ static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
return false;
}
+static inline bool kvm_xen_is_hypercall_page_msr(struct kvm *kvm, u32 msr)
+{
+ return false;
+}
+
static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
{
return false;
@@ -157,8 +178,9 @@ static inline bool kvm_xen_timer_enabled(struct kvm_vcpu *vcpu)
return false;
}
-static inline void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu)
+static inline bool kvm_xen_is_tsc_leaf(struct kvm_vcpu *vcpu, u32 function)
{
+ return false;
}
#endif
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index d94ef37480bd..fdc7a0b2af10 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1549,8 +1549,8 @@ static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
cmd = blk_mq_rq_to_pdu(req);
cmd->error = null_process_cmd(cmd, req_op(req), blk_rq_pos(req),
blk_rq_sectors(req));
- if (!blk_mq_add_to_batch(req, iob, (__force int) cmd->error,
- blk_mq_end_request_batch))
+ if (!blk_mq_add_to_batch(req, iob, cmd->error != BLK_STS_OK,
+ blk_mq_end_request_batch))
blk_mq_end_request(req, cmd->error);
nr++;
}
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 6a61ec35f426..91cde76a4b3e 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -1207,11 +1207,12 @@ static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
while ((vbr = virtqueue_get_buf(vq->vq, &len)) != NULL) {
struct request *req = blk_mq_rq_from_pdu(vbr);
+ u8 status = virtblk_vbr_status(vbr);
found++;
if (!blk_mq_complete_request_remote(req) &&
- !blk_mq_add_to_batch(req, iob, virtblk_vbr_status(vbr),
- virtblk_complete_batch))
+ !blk_mq_add_to_batch(req, iob, status != VIRTIO_BLK_S_OK,
+ virtblk_complete_batch))
virtblk_request_done(req);
}
diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig
index 4ab32abf0f48..7771edf54fb3 100644
--- a/drivers/bluetooth/Kconfig
+++ b/drivers/bluetooth/Kconfig
@@ -56,6 +56,18 @@ config BT_HCIBTUSB_POLL_SYNC
Say Y here to enable USB poll_sync for Bluetooth USB devices by
default.
+config BT_HCIBTUSB_AUTO_ISOC_ALT
+ bool "Automatically adjust alternate setting for Isoc endpoints"
+ depends on BT_HCIBTUSB
+ default y if CHROME_PLATFORMS
+ help
+ Say Y here to automatically adjusting the alternate setting for
+ HCI_USER_CHANNEL whenever a SCO link is established.
+
+ When enabled, btusb intercepts the HCI_EV_SYNC_CONN_COMPLETE packets
+ and configures isoc endpoint alternate setting automatically when
+ HCI_USER_CHANNEL is in use.
+
config BT_HCIBTUSB_BCM
bool "Broadcom protocol support"
depends on BT_HCIBTUSB
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 2a8d91963c63..a0fc465458b2 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -34,6 +34,7 @@ static bool force_scofix;
static bool enable_autosuspend = IS_ENABLED(CONFIG_BT_HCIBTUSB_AUTOSUSPEND);
static bool enable_poll_sync = IS_ENABLED(CONFIG_BT_HCIBTUSB_POLL_SYNC);
static bool reset = true;
+static bool auto_isoc_alt = IS_ENABLED(CONFIG_BT_HCIBTUSB_AUTO_ISOC_ALT);
static struct usb_driver btusb_driver;
@@ -1085,6 +1086,42 @@ static inline void btusb_free_frags(struct btusb_data *data)
spin_unlock_irqrestore(&data->rxlock, flags);
}
+static void btusb_sco_connected(struct btusb_data *data, struct sk_buff *skb)
+{
+ struct hci_event_hdr *hdr = (void *) skb->data;
+ struct hci_ev_sync_conn_complete *ev =
+ (void *) skb->data + sizeof(*hdr);
+ struct hci_dev *hdev = data->hdev;
+ unsigned int notify_air_mode;
+
+ if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT)
+ return;
+
+ if (skb->len < sizeof(*hdr) || hdr->evt != HCI_EV_SYNC_CONN_COMPLETE)
+ return;
+
+ if (skb->len != sizeof(*hdr) + sizeof(*ev) || ev->status)
+ return;
+
+ switch (ev->air_mode) {
+ case BT_CODEC_CVSD:
+ notify_air_mode = HCI_NOTIFY_ENABLE_SCO_CVSD;
+ break;
+
+ case BT_CODEC_TRANSPARENT:
+ notify_air_mode = HCI_NOTIFY_ENABLE_SCO_TRANSP;
+ break;
+
+ default:
+ return;
+ }
+
+ bt_dev_info(hdev, "enabling SCO with air mode %u", ev->air_mode);
+ data->sco_num = 1;
+ data->air_mode = notify_air_mode;
+ schedule_work(&data->work);
+}
+
static int btusb_recv_event(struct btusb_data *data, struct sk_buff *skb)
{
if (data->intr_interval) {
@@ -1092,6 +1129,10 @@ static int btusb_recv_event(struct btusb_data *data, struct sk_buff *skb)
schedule_delayed_work(&data->rx_work, 0);
}
+ /* Configure altsetting for HCI_USER_CHANNEL on SCO connected */
+ if (auto_isoc_alt && hci_dev_test_flag(data->hdev, HCI_USER_CHANNEL))
+ btusb_sco_connected(data, skb);
+
return data->recv_event(data->hdev, skb);
}
diff --git a/drivers/clk/qcom/dispcc-sm8750.c b/drivers/clk/qcom/dispcc-sm8750.c
index 0358dff91da5..e9bca179998b 100644
--- a/drivers/clk/qcom/dispcc-sm8750.c
+++ b/drivers/clk/qcom/dispcc-sm8750.c
@@ -827,7 +827,6 @@ static struct clk_regmap_div disp_cc_mdss_byte0_div_clk_src = {
&disp_cc_mdss_byte0_clk_src.clkr.hw,
},
.num_parents = 1,
- .flags = CLK_SET_RATE_PARENT,
.ops = &clk_regmap_div_ops,
},
};
@@ -842,7 +841,6 @@ static struct clk_regmap_div disp_cc_mdss_byte1_div_clk_src = {
&disp_cc_mdss_byte1_clk_src.clkr.hw,
},
.num_parents = 1,
- .flags = CLK_SET_RATE_PARENT,
.ops = &clk_regmap_div_ops,
},
};
diff --git a/drivers/clk/samsung/clk-gs101.c b/drivers/clk/samsung/clk-gs101.c
index 86b39edba122..08b867ae3ed9 100644
--- a/drivers/clk/samsung/clk-gs101.c
+++ b/drivers/clk/samsung/clk-gs101.c
@@ -382,17 +382,9 @@ static const unsigned long cmu_top_clk_regs[] __initconst = {
EARLY_WAKEUP_DPU_DEST,
EARLY_WAKEUP_CSIS_DEST,
EARLY_WAKEUP_SW_TRIG_APM,
- EARLY_WAKEUP_SW_TRIG_APM_SET,
- EARLY_WAKEUP_SW_TRIG_APM_CLEAR,
EARLY_WAKEUP_SW_TRIG_CLUSTER0,
- EARLY_WAKEUP_SW_TRIG_CLUSTER0_SET,
- EARLY_WAKEUP_SW_TRIG_CLUSTER0_CLEAR,
EARLY_WAKEUP_SW_TRIG_DPU,
- EARLY_WAKEUP_SW_TRIG_DPU_SET,
- EARLY_WAKEUP_SW_TRIG_DPU_CLEAR,
EARLY_WAKEUP_SW_TRIG_CSIS,
- EARLY_WAKEUP_SW_TRIG_CSIS_SET,
- EARLY_WAKEUP_SW_TRIG_CSIS_CLEAR,
CLK_CON_MUX_MUX_CLKCMU_BO_BUS,
CLK_CON_MUX_MUX_CLKCMU_BUS0_BUS,
CLK_CON_MUX_MUX_CLKCMU_BUS1_BUS,
diff --git a/drivers/clk/samsung/clk-pll.c b/drivers/clk/samsung/clk-pll.c
index 2e94bba6c396..023a25af73c4 100644
--- a/drivers/clk/samsung/clk-pll.c
+++ b/drivers/clk/samsung/clk-pll.c
@@ -206,6 +206,7 @@ static const struct clk_ops samsung_pll3000_clk_ops = {
*/
/* Maximum lock time can be 270 * PDIV cycles */
#define PLL35XX_LOCK_FACTOR (270)
+#define PLL142XX_LOCK_FACTOR (150)
#define PLL35XX_MDIV_MASK (0x3FF)
#define PLL35XX_PDIV_MASK (0x3F)
@@ -272,7 +273,11 @@ static int samsung_pll35xx_set_rate(struct clk_hw *hw, unsigned long drate,
}
/* Set PLL lock time. */
- writel_relaxed(rate->pdiv * PLL35XX_LOCK_FACTOR,
+ if (pll->type == pll_142xx)
+ writel_relaxed(rate->pdiv * PLL142XX_LOCK_FACTOR,
+ pll->lock_reg);
+ else
+ writel_relaxed(rate->pdiv * PLL35XX_LOCK_FACTOR,
pll->lock_reg);
/* Change PLL PMS values */
diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index 40f76a90fd7d..107d75558b5a 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -2729,8 +2729,9 @@ static int gpio_chrdev_open(struct inode *inode, struct file *file)
cdev->gdev = gpio_device_get(gdev);
cdev->lineinfo_changed_nb.notifier_call = lineinfo_changed_notify;
- ret = atomic_notifier_chain_register(&gdev->line_state_notifier,
- &cdev->lineinfo_changed_nb);
+ scoped_guard(write_lock_irqsave, &gdev->line_state_lock)
+ ret = raw_notifier_chain_register(&gdev->line_state_notifier,
+ &cdev->lineinfo_changed_nb);
if (ret)
goto out_free_bitmap;
@@ -2754,8 +2755,9 @@ out_unregister_device_notifier:
blocking_notifier_chain_unregister(&gdev->device_notifier,
&cdev->device_unregistered_nb);
out_unregister_line_notifier:
- atomic_notifier_chain_unregister(&gdev->line_state_notifier,
- &cdev->lineinfo_changed_nb);
+ scoped_guard(write_lock_irqsave, &gdev->line_state_lock)
+ raw_notifier_chain_unregister(&gdev->line_state_notifier,
+ &cdev->lineinfo_changed_nb);
out_free_bitmap:
gpio_device_put(gdev);
bitmap_free(cdev->watched_lines);
@@ -2779,8 +2781,9 @@ static int gpio_chrdev_release(struct inode *inode, struct file *file)
blocking_notifier_chain_unregister(&gdev->device_notifier,
&cdev->device_unregistered_nb);
- atomic_notifier_chain_unregister(&gdev->line_state_notifier,
- &cdev->lineinfo_changed_nb);
+ scoped_guard(write_lock_irqsave, &gdev->line_state_lock)
+ raw_notifier_chain_unregister(&gdev->line_state_notifier,
+ &cdev->lineinfo_changed_nb);
bitmap_free(cdev->watched_lines);
gpio_device_put(gdev);
kfree(cdev);
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 8741600af7ef..0c00ed2ab431 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1025,7 +1025,8 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
}
}
- ATOMIC_INIT_NOTIFIER_HEAD(&gdev->line_state_notifier);
+ rwlock_init(&gdev->line_state_lock);
+ RAW_INIT_NOTIFIER_HEAD(&gdev->line_state_notifier);
BLOCKING_INIT_NOTIFIER_HEAD(&gdev->device_notifier);
ret = init_srcu_struct(&gdev->srcu);
@@ -1056,24 +1057,19 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
desc->gdev = gdev;
- if (gc->get_direction && gpiochip_line_is_valid(gc, desc_index)) {
- ret = gc->get_direction(gc, desc_index);
- if (ret < 0)
- /*
- * FIXME: Bail-out here once all GPIO drivers
- * are updated to not return errors in
- * situations that can be considered normal
- * operation.
- */
- dev_warn(&gdev->dev,
- "%s: get_direction failed: %d\n",
- __func__, ret);
-
- assign_bit(FLAG_IS_OUT, &desc->flags, !ret);
- } else {
+ /*
+ * We would typically want to check the return value of
+ * get_direction() here but we must not check the return value
+ * and bail-out as pin controllers can have pins configured to
+ * alternate functions and return -EINVAL. Also: there's no
+ * need to take the SRCU lock here.
+ */
+ if (gc->get_direction && gpiochip_line_is_valid(gc, desc_index))
+ assign_bit(FLAG_IS_OUT, &desc->flags,
+ !gc->get_direction(gc, desc_index));
+ else
assign_bit(FLAG_IS_OUT,
&desc->flags, !gc->direction_input);
- }
}
ret = of_gpiochip_add(gc);
@@ -4193,8 +4189,9 @@ EXPORT_SYMBOL_GPL(gpiod_set_array_value_cansleep);
void gpiod_line_state_notify(struct gpio_desc *desc, unsigned long action)
{
- atomic_notifier_call_chain(&desc->gdev->line_state_notifier,
- action, desc);
+ guard(read_lock_irqsave)(&desc->gdev->line_state_lock);
+
+ raw_notifier_call_chain(&desc->gdev->line_state_notifier, action, desc);
}
/**
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index 147156ec502b..c129a03e2040 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -16,6 +16,7 @@
#include <linux/gpio/driver.h>
#include <linux/module.h>
#include <linux/notifier.h>
+#include <linux/spinlock.h>
#include <linux/srcu.h>
#include <linux/workqueue.h>
@@ -45,6 +46,7 @@
* @list: links gpio_device:s together for traversal
* @line_state_notifier: used to notify subscribers about lines being
* requested, released or reconfigured
+ * @line_state_lock: RW-spinlock protecting the line state notifier
* @line_state_wq: used to emit line state events from a separate thread in
* process context
* @device_notifier: used to notify character device wait queues about the GPIO
@@ -72,7 +74,8 @@ struct gpio_device {
const char *label;
void *data;
struct list_head list;
- struct atomic_notifier_head line_state_notifier;
+ struct raw_notifier_head line_state_notifier;
+ rwlock_t line_state_lock;
struct workqueue_struct *line_state_wq;
struct blocking_notifier_head device_notifier;
struct srcu_struct srcu;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 95a05b03f799..c0ddbe7d6f0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2555,7 +2555,6 @@ static int amdgpu_pmops_freeze(struct device *dev)
int r;
r = amdgpu_device_suspend(drm_dev, true);
- adev->in_s4 = false;
if (r)
return r;
@@ -2567,8 +2566,13 @@ static int amdgpu_pmops_freeze(struct device *dev)
static int amdgpu_pmops_thaw(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ int r;
- return amdgpu_device_resume(drm_dev, true);
+ r = amdgpu_device_resume(drm_dev, true);
+ adev->in_s4 = false;
+
+ return r;
}
static int amdgpu_pmops_poweroff(struct device *dev)
@@ -2581,6 +2585,9 @@ static int amdgpu_pmops_poweroff(struct device *dev)
static int amdgpu_pmops_restore(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+ adev->in_s4 = false;
return amdgpu_device_resume(drm_dev, true);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index b749f1c3f6a9..0fb88e6d5d54 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -528,8 +528,9 @@ static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev,
bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
- is_system = (bo->tbo.resource->mem_type == TTM_PL_TT) ||
- (bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT);
+ is_system = bo->tbo.resource &&
+ (bo->tbo.resource->mem_type == TTM_PL_TT ||
+ bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT);
if (bo && bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
*flags |= AMDGPU_PTE_DCC;
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index c633b7ff2943..09fd6ef99b3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -284,7 +284,7 @@ static int vce_v2_0_stop(struct amdgpu_device *adev)
return 0;
}
- ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCN);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE);
if (!ip_block)
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d4593374e7a1..34c2c42c0f95 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1230,11 +1230,13 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
decrement_queue_count(dqm, qpd, q);
if (dqm->dev->kfd->shared_resources.enable_mes) {
- retval = remove_queue_mes(dqm, q, qpd);
- if (retval) {
+ int err;
+
+ err = remove_queue_mes(dqm, q, qpd);
+ if (err) {
dev_err(dev, "Failed to evict queue %d\n",
q->properties.queue_id);
- goto out;
+ retval = err;
}
}
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 9d9645a2d18e..74ad0d1240fe 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -245,6 +245,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector);
static void handle_hpd_rx_irq(void *param);
+static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm,
+ int bl_idx,
+ u32 user_brightness);
+
static bool
is_timing_unchanged_for_freesync(struct drm_crtc_state *old_crtc_state,
struct drm_crtc_state *new_crtc_state);
@@ -3371,8 +3375,19 @@ static int dm_resume(struct amdgpu_ip_block *ip_block)
mutex_unlock(&dm->dc_lock);
+ /* set the backlight after a reset */
+ for (i = 0; i < dm->num_of_edps; i++) {
+ if (dm->backlight_dev[i])
+ amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]);
+ }
+
return 0;
}
+
+ /* leave display off for S4 sequence */
+ if (adev->in_s4)
+ return 0;
+
/* Recreate dc_state - DC invalidates it when setting power state to S3. */
dc_state_release(dm_state->context);
dm_state->context = dc_state_create(dm->dc, NULL);
@@ -4906,6 +4921,7 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector)
dm->backlight_dev[aconnector->bl_idx] =
backlight_device_register(bl_name, aconnector->base.kdev, dm,
&amdgpu_dm_backlight_ops, &props);
+ dm->brightness[aconnector->bl_idx] = props.brightness;
if (IS_ERR(dm->backlight_dev[aconnector->bl_idx])) {
DRM_ERROR("DM: Backlight registration failed!\n");
@@ -4973,7 +4989,6 @@ static void setup_backlight_device(struct amdgpu_display_manager *dm,
aconnector->bl_idx = bl_idx;
amdgpu_dm_update_backlight_caps(dm, bl_idx);
- dm->brightness[bl_idx] = AMDGPU_MAX_BL_LEVEL;
dm->backlight_link[bl_idx] = link;
dm->num_of_edps++;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
index e339c7a8d541..c0dc23244049 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
@@ -455,6 +455,7 @@ void hdcp_destroy(struct kobject *kobj, struct hdcp_workqueue *hdcp_work)
for (i = 0; i < hdcp_work->max_link; i++) {
cancel_delayed_work_sync(&hdcp_work[i].callback_dwork);
cancel_delayed_work_sync(&hdcp_work[i].watchdog_timer_dwork);
+ cancel_delayed_work_sync(&hdcp_work[i].property_validate_dwork);
}
sysfs_remove_bin_file(kobj, &hdcp_work[0].attr);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
index c4a7fd453e5f..a215234151ac 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
@@ -894,8 +894,16 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev)
struct drm_device *dev = adev_to_drm(adev);
struct drm_connector *connector;
struct drm_connector_list_iter iter;
+ int irq_type;
int i;
+ /* First, clear all hpd and hpdrx interrupts */
+ for (i = DC_IRQ_SOURCE_HPD1; i <= DC_IRQ_SOURCE_HPD6RX; i++) {
+ if (!dc_interrupt_set(adev->dm.dc, i, false))
+ drm_err(dev, "Failed to clear hpd(rx) source=%d on init\n",
+ i);
+ }
+
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
struct amdgpu_dm_connector *amdgpu_dm_connector;
@@ -908,10 +916,31 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev)
dc_link = amdgpu_dm_connector->dc_link;
+ /*
+ * Get a base driver irq reference for hpd ints for the lifetime
+ * of dm. Note that only hpd interrupt types are registered with
+ * base driver; hpd_rx types aren't. IOW, amdgpu_irq_get/put on
+ * hpd_rx isn't available. DM currently controls hpd_rx
+ * explicitly with dc_interrupt_set()
+ */
if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) {
- dc_interrupt_set(adev->dm.dc,
- dc_link->irq_source_hpd,
- true);
+ irq_type = dc_link->irq_source_hpd - DC_IRQ_SOURCE_HPD1;
+ /*
+ * TODO: There's a mismatch between mode_info.num_hpd
+ * and what bios reports as the # of connectors with hpd
+ * sources. Since the # of hpd source types registered
+ * with base driver == mode_info.num_hpd, we have to
+ * fallback to dc_interrupt_set for the remaining types.
+ */
+ if (irq_type < adev->mode_info.num_hpd) {
+ if (amdgpu_irq_get(adev, &adev->hpd_irq, irq_type))
+ drm_err(dev, "DM_IRQ: Failed get HPD for source=%d)!\n",
+ dc_link->irq_source_hpd);
+ } else {
+ dc_interrupt_set(adev->dm.dc,
+ dc_link->irq_source_hpd,
+ true);
+ }
}
if (dc_link->irq_source_hpd_rx != DC_IRQ_SOURCE_INVALID) {
@@ -921,12 +950,6 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev)
}
}
drm_connector_list_iter_end(&iter);
-
- /* Update reference counts for HPDs */
- for (i = DC_IRQ_SOURCE_HPD1; i <= adev->mode_info.num_hpd; i++) {
- if (amdgpu_irq_get(adev, &adev->hpd_irq, i - DC_IRQ_SOURCE_HPD1))
- drm_err(dev, "DM_IRQ: Failed get HPD for source=%d)!\n", i);
- }
}
/**
@@ -942,7 +965,7 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev)
struct drm_device *dev = adev_to_drm(adev);
struct drm_connector *connector;
struct drm_connector_list_iter iter;
- int i;
+ int irq_type;
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
@@ -956,9 +979,18 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev)
dc_link = amdgpu_dm_connector->dc_link;
if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) {
- dc_interrupt_set(adev->dm.dc,
- dc_link->irq_source_hpd,
- false);
+ irq_type = dc_link->irq_source_hpd - DC_IRQ_SOURCE_HPD1;
+
+ /* TODO: See same TODO in amdgpu_dm_hpd_init() */
+ if (irq_type < adev->mode_info.num_hpd) {
+ if (amdgpu_irq_put(adev, &adev->hpd_irq, irq_type))
+ drm_err(dev, "DM_IRQ: Failed put HPD for source=%d!\n",
+ dc_link->irq_source_hpd);
+ } else {
+ dc_interrupt_set(adev->dm.dc,
+ dc_link->irq_source_hpd,
+ false);
+ }
}
if (dc_link->irq_source_hpd_rx != DC_IRQ_SOURCE_INVALID) {
@@ -968,10 +1000,4 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev)
}
}
drm_connector_list_iter_end(&iter);
-
- /* Update reference counts for HPDs */
- for (i = DC_IRQ_SOURCE_HPD1; i <= adev->mode_info.num_hpd; i++) {
- if (amdgpu_irq_put(adev, &adev->hpd_irq, i - DC_IRQ_SOURCE_HPD1))
- drm_err(dev, "DM_IRQ: Failed put HPD for source=%d!\n", i);
- }
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 774cc3f4f3fd..92472109f84a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -277,8 +277,11 @@ static int amdgpu_dm_plane_validate_dcc(struct amdgpu_device *adev,
if (!dcc->enable)
return 0;
- if (format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN ||
- !dc->cap_funcs.get_dcc_compression_cap)
+ if (adev->family < AMDGPU_FAMILY_GC_12_0_0 &&
+ format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN)
+ return -EINVAL;
+
+ if (!dc->cap_funcs.get_dcc_compression_cap)
return -EINVAL;
input.format = format;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index a45037cb4cc0..298668e9729c 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -3389,10 +3389,13 @@ static int get_norm_pix_clk(const struct dc_crtc_timing *timing)
break;
case COLOR_DEPTH_121212:
normalized_pix_clk = (pix_clk * 36) / 24;
- break;
+ break;
+ case COLOR_DEPTH_141414:
+ normalized_pix_clk = (pix_clk * 42) / 24;
+ break;
case COLOR_DEPTH_161616:
normalized_pix_clk = (pix_clk * 48) / 24;
- break;
+ break;
default:
ASSERT(0);
break;
diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c
index e5fb0e8333e4..e691a1cf3356 100644
--- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c
@@ -239,6 +239,7 @@ static const struct timing_generator_funcs dce60_tg_funcs = {
dce60_timing_generator_enable_advanced_request,
.configure_crc = dce60_configure_crc,
.get_crc = dce110_get_crc,
+ .is_two_pixels_per_container = dce110_is_two_pixels_per_container,
};
void dce60_timing_generator_construct(
diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index 06c91c5b7f7c..6d09bef671da 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -4025,6 +4025,22 @@ out:
return 0;
}
+static bool primary_mstb_probing_is_done(struct drm_dp_mst_topology_mgr *mgr)
+{
+ bool probing_done = false;
+
+ mutex_lock(&mgr->lock);
+
+ if (mgr->mst_primary && drm_dp_mst_topology_try_get_mstb(mgr->mst_primary)) {
+ probing_done = mgr->mst_primary->link_address_sent;
+ drm_dp_mst_topology_put_mstb(mgr->mst_primary);
+ }
+
+ mutex_unlock(&mgr->lock);
+
+ return probing_done;
+}
+
static inline bool
drm_dp_mst_process_up_req(struct drm_dp_mst_topology_mgr *mgr,
struct drm_dp_pending_up_req *up_req)
@@ -4055,8 +4071,12 @@ drm_dp_mst_process_up_req(struct drm_dp_mst_topology_mgr *mgr,
/* TODO: Add missing handler for DP_RESOURCE_STATUS_NOTIFY events */
if (msg->req_type == DP_CONNECTION_STATUS_NOTIFY) {
- dowork = drm_dp_mst_handle_conn_stat(mstb, &msg->u.conn_stat);
- hotplug = true;
+ if (!primary_mstb_probing_is_done(mgr)) {
+ drm_dbg_kms(mgr->dev, "Got CSN before finish topology probing. Skip it.\n");
+ } else {
+ dowork = drm_dp_mst_handle_conn_stat(mstb, &msg->u.conn_stat);
+ hotplug = true;
+ }
}
drm_dp_mst_topology_put_mstb(mstb);
@@ -4138,10 +4158,11 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
drm_dp_send_up_ack_reply(mgr, mst_primary, up_req->msg.req_type,
false);
+ drm_dp_mst_topology_put_mstb(mst_primary);
+
if (up_req->msg.req_type == DP_CONNECTION_STATUS_NOTIFY) {
const struct drm_dp_connection_status_notify *conn_stat =
&up_req->msg.u.conn_stat;
- bool handle_csn;
drm_dbg_kms(mgr->dev, "Got CSN: pn: %d ldps:%d ddps: %d mcs: %d ip: %d pdt: %d\n",
conn_stat->port_number,
@@ -4150,16 +4171,6 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
conn_stat->message_capability_status,
conn_stat->input_port,
conn_stat->peer_device_type);
-
- mutex_lock(&mgr->probe_lock);
- handle_csn = mst_primary->link_address_sent;
- mutex_unlock(&mgr->probe_lock);
-
- if (!handle_csn) {
- drm_dbg_kms(mgr->dev, "Got CSN before finish topology probing. Skip it.");
- kfree(up_req);
- goto out_put_primary;
- }
} else if (up_req->msg.req_type == DP_RESOURCE_STATUS_NOTIFY) {
const struct drm_dp_resource_status_notify *res_stat =
&up_req->msg.u.resource_stat;
@@ -4174,9 +4185,6 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
list_add_tail(&up_req->next, &mgr->up_req_list);
mutex_unlock(&mgr->up_req_lock);
queue_work(system_long_wq, &mgr->up_req_work);
-
-out_put_primary:
- drm_dp_mst_topology_put_mstb(mst_primary);
out_clear_reply:
reset_msg_rx_state(&mgr->up_req_recv);
return ret;
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
index 370dc676e3aa..fd36b8fd54e9 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -956,6 +956,10 @@ int drm_atomic_connector_commit_dpms(struct drm_atomic_state *state,
if (mode != DRM_MODE_DPMS_ON)
mode = DRM_MODE_DPMS_OFF;
+
+ if (connector->dpms == mode)
+ goto out;
+
connector->dpms = mode;
crtc = connector->state->crtc;
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 5f24d6b41cc6..48b08c9611a7 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -1427,6 +1427,10 @@ EXPORT_SYMBOL(drm_hdmi_connector_get_output_format_name);
* callback. For atomic drivers the remapping to the "ACTIVE" property is
* implemented in the DRM core.
*
+ * On atomic drivers any DPMS setproperty ioctl where the value does not
+ * change is completely skipped, otherwise a full atomic commit will occur.
+ * On legacy drivers the exact behavior is driver specific.
+ *
* Note that this property cannot be set through the MODE_ATOMIC ioctl,
* userspace must use "ACTIVE" on the CRTC instead.
*
diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs
index bcf248f69252..6903e2010cb9 100644
--- a/drivers/gpu/drm/drm_panic_qr.rs
+++ b/drivers/gpu/drm/drm_panic_qr.rs
@@ -545,7 +545,7 @@ impl EncodedMsg<'_> {
}
self.push(&mut offset, (MODE_STOP, 4));
- let pad_offset = (offset + 7) / 8;
+ let pad_offset = offset.div_ceil(8);
for i in pad_offset..self.version.max_data() {
self.data[i] = PADDING[(i & 1) ^ (pad_offset & 1)];
}
@@ -659,7 +659,7 @@ struct QrImage<'a> {
impl QrImage<'_> {
fn new<'a, 'b>(em: &'b EncodedMsg<'b>, qrdata: &'a mut [u8]) -> QrImage<'a> {
let width = em.version.width();
- let stride = (width + 7) / 8;
+ let stride = width.div_ceil(8);
let data = qrdata;
let mut qr_image = QrImage {
@@ -911,16 +911,16 @@ impl QrImage<'_> {
///
/// * `url`: The base URL of the QR code. It will be encoded as Binary segment.
/// * `data`: A pointer to the binary data, to be encoded. if URL is NULL, it
-/// will be encoded as binary segment, otherwise it will be encoded
-/// efficiently as a numeric segment, and appended to the URL.
+/// will be encoded as binary segment, otherwise it will be encoded
+/// efficiently as a numeric segment, and appended to the URL.
/// * `data_len`: Length of the data, that needs to be encoded, must be less
-/// than data_size.
+/// than data_size.
/// * `data_size`: Size of data buffer, it should be at least 4071 bytes to hold
-/// a V40 QR code. It will then be overwritten with the QR code image.
+/// a V40 QR code. It will then be overwritten with the QR code image.
/// * `tmp`: A temporary buffer that the QR code encoder will use, to write the
-/// segments and ECC.
+/// segments and ECC.
/// * `tmp_size`: Size of the temporary buffer, it must be at least 3706 bytes
-/// long for V40.
+/// long for V40.
///
/// # Safety
///
diff --git a/drivers/gpu/drm/gma500/mid_bios.c b/drivers/gpu/drm/gma500/mid_bios.c
index 7e76790c6a81..cba97d7db131 100644
--- a/drivers/gpu/drm/gma500/mid_bios.c
+++ b/drivers/gpu/drm/gma500/mid_bios.c
@@ -279,6 +279,11 @@ static void mid_get_vbt_data(struct drm_psb_private *dev_priv)
0, PCI_DEVFN(2, 0));
int ret = -1;
+ if (pci_gfx_root == NULL) {
+ WARN_ON(1);
+ return;
+ }
+
/* Get the address of the platform config vbt */
pci_read_config_dword(pci_gfx_root, 0xFC, &addr);
pci_dev_put(pci_gfx_root);
diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c
index f59abfa7622a..0d49f168a919 100644
--- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c
+++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c
@@ -154,6 +154,7 @@ static int hyperv_vmbus_probe(struct hv_device *hdev,
return 0;
err_free_mmio:
+ iounmap(hv->vram);
vmbus_free_mmio(hv->mem->start, hv->fb_size);
err_vmbus_close:
vmbus_close(hdev->channel);
@@ -172,6 +173,7 @@ static void hyperv_vmbus_remove(struct hv_device *hdev)
vmbus_close(hdev->channel);
hv_set_drvdata(hdev, NULL);
+ iounmap(hv->vram);
vmbus_free_mmio(hv->mem->start, hv->fb_size);
}
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 41128469f12a..c9dcf2bbd4c7 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -7830,9 +7830,6 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
intel_program_dpkgc_latency(state);
- if (state->modeset)
- intel_set_cdclk_post_plane_update(state);
-
intel_wait_for_vblank_workers(state);
/* FIXME: We should call drm_atomic_helper_commit_hw_done() here
@@ -7906,6 +7903,8 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
intel_verify_planes(state);
intel_sagv_post_plane_update(state);
+ if (state->modeset)
+ intel_set_cdclk_post_plane_update(state);
intel_pmdemand_post_plane_update(state);
drm_atomic_helper_commit_hw_done(&state->base);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 21274aa9bddd..c3dabb857960 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -164,6 +164,9 @@ static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
* 4 - Support multiple fault handlers per object depending on object's
* backing storage (a.k.a. MMAP_OFFSET).
*
+ * 5 - Support multiple partial mmaps(mmap part of BO + unmap a offset, multiple
+ * times with different size and offset).
+ *
* Restrictions:
*
* * snoopable objects cannot be accessed via the GTT. It can cause machine
@@ -191,7 +194,7 @@ static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
*/
int i915_gem_mmap_gtt_version(void)
{
- return 4;
+ return 5;
}
static inline struct i915_gtt_view
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index df7f130fb663..b995d1d51aed 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -6,6 +6,7 @@
#include "xe_guc_pc.h"
#include <linux/delay.h>
+#include <linux/ktime.h>
#include <drm/drm_managed.h>
#include <generated/xe_wa_oob.h>
@@ -19,6 +20,7 @@
#include "xe_gt.h"
#include "xe_gt_idle.h"
#include "xe_gt_printk.h"
+#include "xe_gt_throttle.h"
#include "xe_gt_types.h"
#include "xe_guc.h"
#include "xe_guc_ct.h"
@@ -49,6 +51,9 @@
#define LNL_MERT_FREQ_CAP 800
#define BMG_MERT_FREQ_CAP 2133
+#define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */
+#define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */
+
/**
* DOC: GuC Power Conservation (PC)
*
@@ -113,9 +118,10 @@ static struct iosys_map *pc_to_maps(struct xe_guc_pc *pc)
FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, count))
static int wait_for_pc_state(struct xe_guc_pc *pc,
- enum slpc_global_state state)
+ enum slpc_global_state state,
+ int timeout_ms)
{
- int timeout_us = 5000; /* rought 5ms, but no need for precision */
+ int timeout_us = 1000 * timeout_ms;
int slept, wait = 10;
xe_device_assert_mem_access(pc_to_xe(pc));
@@ -164,7 +170,8 @@ static int pc_action_query_task_state(struct xe_guc_pc *pc)
};
int ret;
- if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING))
+ if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
+ SLPC_RESET_TIMEOUT_MS))
return -EAGAIN;
/* Blocking here to ensure the results are ready before reading them */
@@ -187,7 +194,8 @@ static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value)
};
int ret;
- if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING))
+ if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
+ SLPC_RESET_TIMEOUT_MS))
return -EAGAIN;
ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
@@ -208,7 +216,8 @@ static int pc_action_unset_param(struct xe_guc_pc *pc, u8 id)
struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
int ret;
- if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING))
+ if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
+ SLPC_RESET_TIMEOUT_MS))
return -EAGAIN;
ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
@@ -440,6 +449,15 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc)
return freq;
}
+static u32 get_cur_freq(struct xe_gt *gt)
+{
+ u32 freq;
+
+ freq = xe_mmio_read32(&gt->mmio, RPNSWREQ);
+ freq = REG_FIELD_GET(REQ_RATIO_MASK, freq);
+ return decode_freq(freq);
+}
+
/**
* xe_guc_pc_get_cur_freq - Get Current requested frequency
* @pc: The GuC PC
@@ -463,10 +481,7 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq)
return -ETIMEDOUT;
}
- *freq = xe_mmio_read32(&gt->mmio, RPNSWREQ);
-
- *freq = REG_FIELD_GET(REQ_RATIO_MASK, *freq);
- *freq = decode_freq(*freq);
+ *freq = get_cur_freq(gt);
xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
@@ -1002,6 +1017,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
struct xe_gt *gt = pc_to_gt(pc);
u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
unsigned int fw_ref;
+ ktime_t earlier;
int ret;
xe_gt_assert(gt, xe_device_uc_enabled(xe));
@@ -1026,14 +1042,25 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
memset(pc->bo->vmap.vaddr, 0, size);
slpc_shared_data_write(pc, header.size, size);
+ earlier = ktime_get();
ret = pc_action_reset(pc);
if (ret)
goto out;
- if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) {
- xe_gt_err(gt, "GuC PC Start failed\n");
- ret = -EIO;
- goto out;
+ if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
+ SLPC_RESET_TIMEOUT_MS)) {
+ xe_gt_warn(gt, "GuC PC start taking longer than normal [freq = %dMHz (req = %dMHz), perf_limit_reasons = 0x%08X]\n",
+ xe_guc_pc_get_act_freq(pc), get_cur_freq(gt),
+ xe_gt_throttle_get_limit_reasons(gt));
+
+ if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
+ SLPC_RESET_EXTENDED_TIMEOUT_MS)) {
+ xe_gt_err(gt, "GuC PC Start failed: Dynamic GT frequency control and GT sleep states are now disabled.\n");
+ goto out;
+ }
+
+ xe_gt_warn(gt, "GuC PC excessive start time: %lldms",
+ ktime_ms_delta(ktime_get(), earlier));
}
ret = pc_init_freqs(pc);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index b6a2dd742ebd..1a5fe4822a62 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1246,11 +1246,11 @@ static void __guc_exec_queue_fini_async(struct work_struct *w)
xe_pm_runtime_get(guc_to_xe(guc));
trace_xe_exec_queue_destroy(q);
+ release_guc_id(guc, q);
if (xe_exec_queue_is_lr(q))
cancel_work_sync(&ge->lr_tdr);
/* Confirm no work left behind accessing device structures */
cancel_delayed_work_sync(&ge->sched.base.work_tdr);
- release_guc_id(guc, q);
xe_sched_entity_fini(&ge->entity);
xe_sched_fini(&ge->sched);
diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c
index 392102515f3d..c3cc0fa105e8 100644
--- a/drivers/gpu/drm/xe/xe_hmm.c
+++ b/drivers/gpu/drm/xe/xe_hmm.c
@@ -138,13 +138,17 @@ static int xe_build_sg(struct xe_device *xe, struct hmm_range *range,
i += size;
if (unlikely(j == st->nents - 1)) {
+ xe_assert(xe, i >= npages);
if (i > npages)
size -= (i - npages);
+
sg_mark_end(sgl);
+ } else {
+ xe_assert(xe, i < npages);
}
+
sg_set_page(sgl, page, size << PAGE_SHIFT, 0);
}
- xe_assert(xe, i == npages);
return dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE,
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING);
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index c9cc0c091dfd..89fd2c043136 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -267,6 +267,15 @@ int xe_pm_init_early(struct xe_device *xe)
}
ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */
+static u32 vram_threshold_value(struct xe_device *xe)
+{
+ /* FIXME: D3Cold temporarily disabled by default on BMG */
+ if (xe->info.platform == XE_BATTLEMAGE)
+ return 0;
+
+ return DEFAULT_VRAM_THRESHOLD;
+}
+
/**
* xe_pm_init - Initialize Xe Power Management
* @xe: xe device instance
@@ -277,6 +286,7 @@ ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */
*/
int xe_pm_init(struct xe_device *xe)
{
+ u32 vram_threshold;
int err;
/* For now suspend/resume is only allowed with GuC */
@@ -290,7 +300,8 @@ int xe_pm_init(struct xe_device *xe)
if (err)
return err;
- err = xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD);
+ vram_threshold = vram_threshold_value(xe);
+ err = xe_pm_set_vram_threshold(xe, vram_threshold);
if (err)
return err;
}
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index ec6ec18ab3fa..5956631c0d40 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1809,9 +1809,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
return -EINVAL;
- if (XE_IOCTL_DBG(xe, args->extensions))
- return -EINVAL;
-
if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
flags |= XE_VM_FLAG_SCRATCH_PAGE;
if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 0f6cd44fff29..6e55a1a2613d 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2262,12 +2262,25 @@ void vmbus_free_mmio(resource_size_t start, resource_size_t size)
struct resource *iter;
mutex_lock(&hyperv_mmio_lock);
+
+ /*
+ * If all bytes of the MMIO range to be released are within the
+ * special case fb_mmio shadow region, skip releasing the shadow
+ * region since no corresponding __request_region() was done
+ * in vmbus_allocate_mmio().
+ */
+ if (fb_mmio && start >= fb_mmio->start &&
+ (start + size - 1 <= fb_mmio->end))
+ goto skip_shadow_release;
+
for (iter = hyperv_mmio; iter; iter = iter->sibling) {
if ((iter->start >= start + size) || (iter->end <= start))
continue;
__release_region(iter, start, size);
}
+
+skip_shadow_release:
release_mem_region(start, size);
mutex_unlock(&hyperv_mmio_lock);
diff --git a/drivers/i2c/busses/i2c-ali1535.c b/drivers/i2c/busses/i2c-ali1535.c
index 544c94e86b89..1eac35838040 100644
--- a/drivers/i2c/busses/i2c-ali1535.c
+++ b/drivers/i2c/busses/i2c-ali1535.c
@@ -485,6 +485,8 @@ MODULE_DEVICE_TABLE(pci, ali1535_ids);
static int ali1535_probe(struct pci_dev *dev, const struct pci_device_id *id)
{
+ int ret;
+
if (ali1535_setup(dev)) {
dev_warn(&dev->dev,
"ALI1535 not detected, module not inserted.\n");
@@ -496,7 +498,15 @@ static int ali1535_probe(struct pci_dev *dev, const struct pci_device_id *id)
snprintf(ali1535_adapter.name, sizeof(ali1535_adapter.name),
"SMBus ALI1535 adapter at %04x", ali1535_offset);
- return i2c_add_adapter(&ali1535_adapter);
+ ret = i2c_add_adapter(&ali1535_adapter);
+ if (ret)
+ goto release_region;
+
+ return 0;
+
+release_region:
+ release_region(ali1535_smba, ALI1535_SMB_IOSIZE);
+ return ret;
}
static void ali1535_remove(struct pci_dev *dev)
diff --git a/drivers/i2c/busses/i2c-ali15x3.c b/drivers/i2c/busses/i2c-ali15x3.c
index 4761c7208102..418d11266671 100644
--- a/drivers/i2c/busses/i2c-ali15x3.c
+++ b/drivers/i2c/busses/i2c-ali15x3.c
@@ -472,6 +472,8 @@ MODULE_DEVICE_TABLE (pci, ali15x3_ids);
static int ali15x3_probe(struct pci_dev *dev, const struct pci_device_id *id)
{
+ int ret;
+
if (ali15x3_setup(dev)) {
dev_err(&dev->dev,
"ALI15X3 not detected, module not inserted.\n");
@@ -483,7 +485,15 @@ static int ali15x3_probe(struct pci_dev *dev, const struct pci_device_id *id)
snprintf(ali15x3_adapter.name, sizeof(ali15x3_adapter.name),
"SMBus ALI15X3 adapter at %04x", ali15x3_smba);
- return i2c_add_adapter(&ali15x3_adapter);
+ ret = i2c_add_adapter(&ali15x3_adapter);
+ if (ret)
+ goto release_region;
+
+ return 0;
+
+release_region:
+ release_region(ali15x3_smba, ALI15X3_SMB_IOSIZE);
+ return ret;
}
static void ali15x3_remove(struct pci_dev *dev)
diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 92faf03d64cf..f18c3e74b076 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -1048,23 +1048,6 @@ static int omap_i2c_transmit_data(struct omap_i2c_dev *omap, u8 num_bytes,
return 0;
}
-static irqreturn_t
-omap_i2c_isr(int irq, void *dev_id)
-{
- struct omap_i2c_dev *omap = dev_id;
- irqreturn_t ret = IRQ_HANDLED;
- u16 mask;
- u16 stat;
-
- stat = omap_i2c_read_reg(omap, OMAP_I2C_STAT_REG);
- mask = omap_i2c_read_reg(omap, OMAP_I2C_IE_REG) & ~OMAP_I2C_STAT_NACK;
-
- if (stat & mask)
- ret = IRQ_WAKE_THREAD;
-
- return ret;
-}
-
static int omap_i2c_xfer_data(struct omap_i2c_dev *omap)
{
u16 bits;
@@ -1095,8 +1078,13 @@ static int omap_i2c_xfer_data(struct omap_i2c_dev *omap)
}
if (stat & OMAP_I2C_STAT_NACK) {
- err |= OMAP_I2C_STAT_NACK;
+ omap->cmd_err |= OMAP_I2C_STAT_NACK;
omap_i2c_ack_stat(omap, OMAP_I2C_STAT_NACK);
+
+ if (!(stat & ~OMAP_I2C_STAT_NACK)) {
+ err = -EAGAIN;
+ break;
+ }
}
if (stat & OMAP_I2C_STAT_AL) {
@@ -1472,7 +1460,7 @@ omap_i2c_probe(struct platform_device *pdev)
IRQF_NO_SUSPEND, pdev->name, omap);
else
r = devm_request_threaded_irq(&pdev->dev, omap->irq,
- omap_i2c_isr, omap_i2c_isr_thread,
+ NULL, omap_i2c_isr_thread,
IRQF_NO_SUSPEND | IRQF_ONESHOT,
pdev->name, omap);
diff --git a/drivers/i2c/busses/i2c-sis630.c b/drivers/i2c/busses/i2c-sis630.c
index 3505cf29cedd..a19c3d251804 100644
--- a/drivers/i2c/busses/i2c-sis630.c
+++ b/drivers/i2c/busses/i2c-sis630.c
@@ -509,6 +509,8 @@ MODULE_DEVICE_TABLE(pci, sis630_ids);
static int sis630_probe(struct pci_dev *dev, const struct pci_device_id *id)
{
+ int ret;
+
if (sis630_setup(dev)) {
dev_err(&dev->dev,
"SIS630 compatible bus not detected, "
@@ -522,7 +524,15 @@ static int sis630_probe(struct pci_dev *dev, const struct pci_device_id *id)
snprintf(sis630_adapter.name, sizeof(sis630_adapter.name),
"SMBus SIS630 adapter at %04x", smbus_base + SMB_STS);
- return i2c_add_adapter(&sis630_adapter);
+ ret = i2c_add_adapter(&sis630_adapter);
+ if (ret)
+ goto release_region;
+
+ return 0;
+
+release_region:
+ release_region(smbus_base + SMB_STS, SIS630_SMB_IOREGION);
+ return ret;
}
static void sis630_remove(struct pci_dev *dev)
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 8fe2a51df649..c33e6f33265b 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -140,6 +140,7 @@ static const struct xpad_device {
{ 0x044f, 0x0f00, "Thrustmaster Wheel", 0, XTYPE_XBOX },
{ 0x044f, 0x0f03, "Thrustmaster Wheel", 0, XTYPE_XBOX },
{ 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX },
+ { 0x044f, 0xd01e, "ThrustMaster, Inc. ESWAP X 2 ELDEN RING EDITION", 0, XTYPE_XBOXONE },
{ 0x044f, 0x0f10, "Thrustmaster Modena GT Wheel", 0, XTYPE_XBOX },
{ 0x044f, 0xb326, "Thrustmaster Gamepad GP XID", 0, XTYPE_XBOX360 },
{ 0x045e, 0x0202, "Microsoft X-Box pad v1 (US)", 0, XTYPE_XBOX },
@@ -177,6 +178,7 @@ static const struct xpad_device {
{ 0x06a3, 0x0200, "Saitek Racing Wheel", 0, XTYPE_XBOX },
{ 0x06a3, 0x0201, "Saitek Adrenalin", 0, XTYPE_XBOX },
{ 0x06a3, 0xf51a, "Saitek P3600", 0, XTYPE_XBOX360 },
+ { 0x0738, 0x4503, "Mad Catz Racing Wheel", 0, XTYPE_XBOXONE },
{ 0x0738, 0x4506, "Mad Catz 4506 Wireless Controller", 0, XTYPE_XBOX },
{ 0x0738, 0x4516, "Mad Catz Control Pad", 0, XTYPE_XBOX },
{ 0x0738, 0x4520, "Mad Catz Control Pad Pro", 0, XTYPE_XBOX },
@@ -238,6 +240,7 @@ static const struct xpad_device {
{ 0x0e6f, 0x0146, "Rock Candy Wired Controller for Xbox One", 0, XTYPE_XBOXONE },
{ 0x0e6f, 0x0147, "PDP Marvel Xbox One Controller", 0, XTYPE_XBOXONE },
{ 0x0e6f, 0x015c, "PDP Xbox One Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE },
+ { 0x0e6f, 0x015d, "PDP Mirror's Edge Official Wired Controller for Xbox One", XTYPE_XBOXONE },
{ 0x0e6f, 0x0161, "PDP Xbox One Controller", 0, XTYPE_XBOXONE },
{ 0x0e6f, 0x0162, "PDP Xbox One Controller", 0, XTYPE_XBOXONE },
{ 0x0e6f, 0x0163, "PDP Xbox One Controller", 0, XTYPE_XBOXONE },
@@ -276,12 +279,15 @@ static const struct xpad_device {
{ 0x0f0d, 0x0078, "Hori Real Arcade Pro V Kai Xbox One", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE },
{ 0x0f0d, 0x00c5, "Hori Fighting Commander ONE", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE },
{ 0x0f0d, 0x00dc, "HORIPAD FPS for Nintendo Switch", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
+ { 0x0f0d, 0x0151, "Hori Racing Wheel Overdrive for Xbox Series X", 0, XTYPE_XBOXONE },
+ { 0x0f0d, 0x0152, "Hori Racing Wheel Overdrive for Xbox Series X", 0, XTYPE_XBOXONE },
{ 0x0f30, 0x010b, "Philips Recoil", 0, XTYPE_XBOX },
{ 0x0f30, 0x0202, "Joytech Advanced Controller", 0, XTYPE_XBOX },
{ 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX },
{ 0x102c, 0xff0c, "Joytech Wireless Advanced Controller", 0, XTYPE_XBOX },
{ 0x1038, 0x1430, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 },
{ 0x1038, 0x1431, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 },
+ { 0x10f5, 0x7005, "Turtle Beach Recon Controller", 0, XTYPE_XBOXONE },
{ 0x11c9, 0x55f0, "Nacon GC-100XF", 0, XTYPE_XBOX360 },
{ 0x11ff, 0x0511, "PXN V900", 0, XTYPE_XBOX360 },
{ 0x1209, 0x2882, "Ardwiino Controller", 0, XTYPE_XBOX360 },
@@ -306,7 +312,7 @@ static const struct xpad_device {
{ 0x1689, 0xfe00, "Razer Sabertooth", 0, XTYPE_XBOX360 },
{ 0x17ef, 0x6182, "Lenovo Legion Controller for Windows", 0, XTYPE_XBOX360 },
{ 0x1949, 0x041a, "Amazon Game Controller", 0, XTYPE_XBOX360 },
- { 0x1a86, 0xe310, "QH Electronics Controller", 0, XTYPE_XBOX360 },
+ { 0x1a86, 0xe310, "Legion Go S", 0, XTYPE_XBOX360 },
{ 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 },
{ 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
{ 0x1bad, 0x0130, "Ion Drum Rocker", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
@@ -343,6 +349,7 @@ static const struct xpad_device {
{ 0x1bad, 0xfa01, "MadCatz GamePad", 0, XTYPE_XBOX360 },
{ 0x1bad, 0xfd00, "Razer Onza TE", 0, XTYPE_XBOX360 },
{ 0x1bad, 0xfd01, "Razer Onza", 0, XTYPE_XBOX360 },
+ { 0x1ee9, 0x1590, "ZOTAC Gaming Zone", 0, XTYPE_XBOX360 },
{ 0x20d6, 0x2001, "BDA Xbox Series X Wired Controller", 0, XTYPE_XBOXONE },
{ 0x20d6, 0x2009, "PowerA Enhanced Wired Controller for Xbox Series X|S", 0, XTYPE_XBOXONE },
{ 0x20d6, 0x281f, "PowerA Wired Controller For Xbox 360", 0, XTYPE_XBOX360 },
@@ -366,6 +373,7 @@ static const struct xpad_device {
{ 0x24c6, 0x5510, "Hori Fighting Commander ONE (Xbox 360/PC Mode)", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
{ 0x24c6, 0x551a, "PowerA FUSION Pro Controller", 0, XTYPE_XBOXONE },
{ 0x24c6, 0x561a, "PowerA FUSION Controller", 0, XTYPE_XBOXONE },
+ { 0x24c6, 0x581a, "ThrustMaster XB1 Classic Controller", 0, XTYPE_XBOXONE },
{ 0x24c6, 0x5b00, "ThrustMaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 },
{ 0x24c6, 0x5b02, "Thrustmaster, Inc. GPX Controller", 0, XTYPE_XBOX360 },
{ 0x24c6, 0x5b03, "Thrustmaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 },
@@ -374,10 +382,15 @@ static const struct xpad_device {
{ 0x2563, 0x058d, "OneXPlayer Gamepad", 0, XTYPE_XBOX360 },
{ 0x294b, 0x3303, "Snakebyte GAMEPAD BASE X", 0, XTYPE_XBOXONE },
{ 0x294b, 0x3404, "Snakebyte GAMEPAD RGB X", 0, XTYPE_XBOXONE },
+ { 0x2993, 0x2001, "TECNO Pocket Go", 0, XTYPE_XBOX360 },
{ 0x2dc8, 0x2000, "8BitDo Pro 2 Wired Controller fox Xbox", 0, XTYPE_XBOXONE },
{ 0x2dc8, 0x3106, "8BitDo Ultimate Wireless / Pro 2 Wired Controller", 0, XTYPE_XBOX360 },
+ { 0x2dc8, 0x3109, "8BitDo Ultimate Wireless Bluetooth", 0, XTYPE_XBOX360 },
{ 0x2dc8, 0x310a, "8BitDo Ultimate 2C Wireless Controller", 0, XTYPE_XBOX360 },
+ { 0x2dc8, 0x6001, "8BitDo SN30 Pro", 0, XTYPE_XBOX360 },
{ 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE },
+ { 0x2e24, 0x1688, "Hyperkin X91 X-Box One pad", 0, XTYPE_XBOXONE },
+ { 0x2e95, 0x0504, "SCUF Gaming Controller", MAP_SELECT_BUTTON, XTYPE_XBOXONE },
{ 0x31e3, 0x1100, "Wooting One", 0, XTYPE_XBOX360 },
{ 0x31e3, 0x1200, "Wooting Two", 0, XTYPE_XBOX360 },
{ 0x31e3, 0x1210, "Wooting Lekker", 0, XTYPE_XBOX360 },
@@ -385,11 +398,16 @@ static const struct xpad_device {
{ 0x31e3, 0x1230, "Wooting Two HE (ARM)", 0, XTYPE_XBOX360 },
{ 0x31e3, 0x1300, "Wooting 60HE (AVR)", 0, XTYPE_XBOX360 },
{ 0x31e3, 0x1310, "Wooting 60HE (ARM)", 0, XTYPE_XBOX360 },
+ { 0x3285, 0x0603, "Nacon Pro Compact controller for Xbox", 0, XTYPE_XBOXONE },
{ 0x3285, 0x0607, "Nacon GC-100", 0, XTYPE_XBOX360 },
+ { 0x3285, 0x0614, "Nacon Pro Compact", 0, XTYPE_XBOXONE },
{ 0x3285, 0x0646, "Nacon Pro Compact", 0, XTYPE_XBOXONE },
+ { 0x3285, 0x0662, "Nacon Revolution5 Pro", 0, XTYPE_XBOX360 },
{ 0x3285, 0x0663, "Nacon Evol-X", 0, XTYPE_XBOXONE },
{ 0x3537, 0x1004, "GameSir T4 Kaleid", 0, XTYPE_XBOX360 },
+ { 0x3537, 0x1010, "GameSir G7 SE", 0, XTYPE_XBOXONE },
{ 0x3767, 0x0101, "Fanatec Speedster 3 Forceshock Wheel", 0, XTYPE_XBOX },
+ { 0x413d, 0x2104, "Black Shark Green Ghost Gamepad", 0, XTYPE_XBOX360 },
{ 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX },
{ 0x0000, 0x0000, "Generic X-Box pad", 0, XTYPE_UNKNOWN }
};
@@ -488,6 +506,7 @@ static const struct usb_device_id xpad_table[] = {
XPAD_XBOX360_VENDOR(0x03f0), /* HP HyperX Xbox 360 controllers */
XPAD_XBOXONE_VENDOR(0x03f0), /* HP HyperX Xbox One controllers */
XPAD_XBOX360_VENDOR(0x044f), /* Thrustmaster Xbox 360 controllers */
+ XPAD_XBOXONE_VENDOR(0x044f), /* Thrustmaster Xbox One controllers */
XPAD_XBOX360_VENDOR(0x045e), /* Microsoft Xbox 360 controllers */
XPAD_XBOXONE_VENDOR(0x045e), /* Microsoft Xbox One controllers */
XPAD_XBOX360_VENDOR(0x046d), /* Logitech Xbox 360-style controllers */
@@ -519,8 +538,9 @@ static const struct usb_device_id xpad_table[] = {
XPAD_XBOX360_VENDOR(0x1689), /* Razer Onza */
XPAD_XBOX360_VENDOR(0x17ef), /* Lenovo */
XPAD_XBOX360_VENDOR(0x1949), /* Amazon controllers */
- XPAD_XBOX360_VENDOR(0x1a86), /* QH Electronics */
+ XPAD_XBOX360_VENDOR(0x1a86), /* Nanjing Qinheng Microelectronics (WCH) */
XPAD_XBOX360_VENDOR(0x1bad), /* Harmonix Rock Band guitar and drums */
+ XPAD_XBOX360_VENDOR(0x1ee9), /* ZOTAC Technology Limited */
XPAD_XBOX360_VENDOR(0x20d6), /* PowerA controllers */
XPAD_XBOXONE_VENDOR(0x20d6), /* PowerA controllers */
XPAD_XBOX360_VENDOR(0x2345), /* Machenike Controllers */
@@ -528,17 +548,20 @@ static const struct usb_device_id xpad_table[] = {
XPAD_XBOXONE_VENDOR(0x24c6), /* PowerA controllers */
XPAD_XBOX360_VENDOR(0x2563), /* OneXPlayer Gamepad */
XPAD_XBOX360_VENDOR(0x260d), /* Dareu H101 */
- XPAD_XBOXONE_VENDOR(0x294b), /* Snakebyte */
+ XPAD_XBOXONE_VENDOR(0x294b), /* Snakebyte */
+ XPAD_XBOX360_VENDOR(0x2993), /* TECNO Mobile */
XPAD_XBOX360_VENDOR(0x2c22), /* Qanba Controllers */
- XPAD_XBOX360_VENDOR(0x2dc8), /* 8BitDo Pro 2 Wired Controller */
- XPAD_XBOXONE_VENDOR(0x2dc8), /* 8BitDo Pro 2 Wired Controller for Xbox */
- XPAD_XBOXONE_VENDOR(0x2e24), /* Hyperkin Duke Xbox One pad */
- XPAD_XBOX360_VENDOR(0x2f24), /* GameSir controllers */
+ XPAD_XBOX360_VENDOR(0x2dc8), /* 8BitDo Controllers */
+ XPAD_XBOXONE_VENDOR(0x2dc8), /* 8BitDo Controllers */
+ XPAD_XBOXONE_VENDOR(0x2e24), /* Hyperkin Controllers */
+ XPAD_XBOX360_VENDOR(0x2f24), /* GameSir Controllers */
+ XPAD_XBOXONE_VENDOR(0x2e95), /* SCUF Gaming Controller */
XPAD_XBOX360_VENDOR(0x31e3), /* Wooting Keyboards */
XPAD_XBOX360_VENDOR(0x3285), /* Nacon GC-100 */
XPAD_XBOXONE_VENDOR(0x3285), /* Nacon Evol-X */
XPAD_XBOX360_VENDOR(0x3537), /* GameSir Controllers */
XPAD_XBOXONE_VENDOR(0x3537), /* GameSir Controllers */
+ XPAD_XBOX360_VENDOR(0x413d), /* Black Shark Green Ghost Controller */
{ }
};
@@ -691,7 +714,9 @@ static const struct xboxone_init_packet xboxone_init_packets[] = {
XBOXONE_INIT_PKT(0x045e, 0x0b00, xboxone_s_init),
XBOXONE_INIT_PKT(0x045e, 0x0b00, extra_input_packet_init),
XBOXONE_INIT_PKT(0x0e6f, 0x0000, xboxone_pdp_led_on),
+ XBOXONE_INIT_PKT(0x20d6, 0xa01a, xboxone_pdp_led_on),
XBOXONE_INIT_PKT(0x0e6f, 0x0000, xboxone_pdp_auth),
+ XBOXONE_INIT_PKT(0x20d6, 0xa01a, xboxone_pdp_auth),
XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumblebegin_init),
XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumblebegin_init),
XBOXONE_INIT_PKT(0x24c6, 0x543a, xboxone_rumblebegin_init),
diff --git a/drivers/input/misc/iqs7222.c b/drivers/input/misc/iqs7222.c
index 22022d11470d..80b917944b51 100644
--- a/drivers/input/misc/iqs7222.c
+++ b/drivers/input/misc/iqs7222.c
@@ -100,11 +100,11 @@ enum iqs7222_reg_key_id {
enum iqs7222_reg_grp_id {
IQS7222_REG_GRP_STAT,
- IQS7222_REG_GRP_FILT,
IQS7222_REG_GRP_CYCLE,
IQS7222_REG_GRP_GLBL,
IQS7222_REG_GRP_BTN,
IQS7222_REG_GRP_CHAN,
+ IQS7222_REG_GRP_FILT,
IQS7222_REG_GRP_SLDR,
IQS7222_REG_GRP_TPAD,
IQS7222_REG_GRP_GPIO,
@@ -286,6 +286,7 @@ static const struct iqs7222_event_desc iqs7222_tp_events[] = {
struct iqs7222_reg_grp_desc {
u16 base;
+ u16 val_len;
int num_row;
int num_col;
};
@@ -342,6 +343,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = {
},
[IQS7222_REG_GRP_FILT] = {
.base = 0xAC00,
+ .val_len = 3,
.num_row = 1,
.num_col = 2,
},
@@ -400,6 +402,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = {
},
[IQS7222_REG_GRP_FILT] = {
.base = 0xAC00,
+ .val_len = 3,
.num_row = 1,
.num_col = 2,
},
@@ -454,6 +457,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = {
},
[IQS7222_REG_GRP_FILT] = {
.base = 0xC400,
+ .val_len = 3,
.num_row = 1,
.num_col = 2,
},
@@ -496,6 +500,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = {
},
[IQS7222_REG_GRP_FILT] = {
.base = 0xC400,
+ .val_len = 3,
.num_row = 1,
.num_col = 2,
},
@@ -543,6 +548,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = {
},
[IQS7222_REG_GRP_FILT] = {
.base = 0xAA00,
+ .val_len = 3,
.num_row = 1,
.num_col = 2,
},
@@ -600,6 +606,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = {
},
[IQS7222_REG_GRP_FILT] = {
.base = 0xAA00,
+ .val_len = 3,
.num_row = 1,
.num_col = 2,
},
@@ -656,6 +663,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = {
},
[IQS7222_REG_GRP_FILT] = {
.base = 0xAE00,
+ .val_len = 3,
.num_row = 1,
.num_col = 2,
},
@@ -712,6 +720,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = {
},
[IQS7222_REG_GRP_FILT] = {
.base = 0xAE00,
+ .val_len = 3,
.num_row = 1,
.num_col = 2,
},
@@ -768,6 +777,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = {
},
[IQS7222_REG_GRP_FILT] = {
.base = 0xAE00,
+ .val_len = 3,
.num_row = 1,
.num_col = 2,
},
@@ -1604,7 +1614,7 @@ static int iqs7222_force_comms(struct iqs7222_private *iqs7222)
}
static int iqs7222_read_burst(struct iqs7222_private *iqs7222,
- u16 reg, void *val, u16 num_val)
+ u16 reg, void *val, u16 val_len)
{
u8 reg_buf[sizeof(__be16)];
int ret, i;
@@ -1619,7 +1629,7 @@ static int iqs7222_read_burst(struct iqs7222_private *iqs7222,
{
.addr = client->addr,
.flags = I2C_M_RD,
- .len = num_val * sizeof(__le16),
+ .len = val_len,
.buf = (u8 *)val,
},
};
@@ -1675,7 +1685,7 @@ static int iqs7222_read_word(struct iqs7222_private *iqs7222, u16 reg, u16 *val)
__le16 val_buf;
int error;
- error = iqs7222_read_burst(iqs7222, reg, &val_buf, 1);
+ error = iqs7222_read_burst(iqs7222, reg, &val_buf, sizeof(val_buf));
if (error)
return error;
@@ -1685,10 +1695,9 @@ static int iqs7222_read_word(struct iqs7222_private *iqs7222, u16 reg, u16 *val)
}
static int iqs7222_write_burst(struct iqs7222_private *iqs7222,
- u16 reg, const void *val, u16 num_val)
+ u16 reg, const void *val, u16 val_len)
{
int reg_len = reg > U8_MAX ? sizeof(reg) : sizeof(u8);
- int val_len = num_val * sizeof(__le16);
int msg_len = reg_len + val_len;
int ret, i;
struct i2c_client *client = iqs7222->client;
@@ -1747,7 +1756,7 @@ static int iqs7222_write_word(struct iqs7222_private *iqs7222, u16 reg, u16 val)
{
__le16 val_buf = cpu_to_le16(val);
- return iqs7222_write_burst(iqs7222, reg, &val_buf, 1);
+ return iqs7222_write_burst(iqs7222, reg, &val_buf, sizeof(val_buf));
}
static int iqs7222_ati_trigger(struct iqs7222_private *iqs7222)
@@ -1831,30 +1840,14 @@ static int iqs7222_dev_init(struct iqs7222_private *iqs7222, int dir)
/*
* Acknowledge reset before writing any registers in case the device
- * suffers a spurious reset during initialization. Because this step
- * may change the reserved fields of the second filter beta register,
- * its cache must be updated.
- *
- * Writing the second filter beta register, in turn, may clobber the
- * system status register. As such, the filter beta register pair is
- * written first to protect against this hazard.
+ * suffers a spurious reset during initialization.
*/
if (dir == WRITE) {
- u16 reg = dev_desc->reg_grps[IQS7222_REG_GRP_FILT].base + 1;
- u16 filt_setup;
-
error = iqs7222_write_word(iqs7222, IQS7222_SYS_SETUP,
iqs7222->sys_setup[0] |
IQS7222_SYS_SETUP_ACK_RESET);
if (error)
return error;
-
- error = iqs7222_read_word(iqs7222, reg, &filt_setup);
- if (error)
- return error;
-
- iqs7222->filt_setup[1] &= GENMASK(7, 0);
- iqs7222->filt_setup[1] |= (filt_setup & ~GENMASK(7, 0));
}
/*
@@ -1883,6 +1876,7 @@ static int iqs7222_dev_init(struct iqs7222_private *iqs7222, int dir)
int num_col = dev_desc->reg_grps[i].num_col;
u16 reg = dev_desc->reg_grps[i].base;
__le16 *val_buf;
+ u16 val_len = dev_desc->reg_grps[i].val_len ? : num_col * sizeof(*val_buf);
u16 *val;
if (!num_col)
@@ -1900,7 +1894,7 @@ static int iqs7222_dev_init(struct iqs7222_private *iqs7222, int dir)
switch (dir) {
case READ:
error = iqs7222_read_burst(iqs7222, reg,
- val_buf, num_col);
+ val_buf, val_len);
for (k = 0; k < num_col; k++)
val[k] = le16_to_cpu(val_buf[k]);
break;
@@ -1909,7 +1903,7 @@ static int iqs7222_dev_init(struct iqs7222_private *iqs7222, int dir)
for (k = 0; k < num_col; k++)
val_buf[k] = cpu_to_le16(val[k]);
error = iqs7222_write_burst(iqs7222, reg,
- val_buf, num_col);
+ val_buf, val_len);
break;
default:
@@ -1962,7 +1956,7 @@ static int iqs7222_dev_info(struct iqs7222_private *iqs7222)
int error, i;
error = iqs7222_read_burst(iqs7222, IQS7222_PROD_NUM, dev_id,
- ARRAY_SIZE(dev_id));
+ sizeof(dev_id));
if (error)
return error;
@@ -2915,7 +2909,7 @@ static int iqs7222_report(struct iqs7222_private *iqs7222)
__le16 status[IQS7222_MAX_COLS_STAT];
error = iqs7222_read_burst(iqs7222, IQS7222_SYS_STATUS, status,
- num_stat);
+ num_stat * sizeof(*status));
if (error)
return error;
diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h
index 127cfdc8668a..6ed9fc34948c 100644
--- a/drivers/input/serio/i8042-acpipnpio.h
+++ b/drivers/input/serio/i8042-acpipnpio.h
@@ -1080,16 +1080,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
DMI_MATCH(DMI_BOARD_VENDOR, "TUXEDO"),
DMI_MATCH(DMI_BOARD_NAME, "AURA1501"),
},
- .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
- SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
},
{
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "TUXEDO"),
DMI_MATCH(DMI_BOARD_NAME, "EDUBOOK1502"),
},
- .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
- SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
},
{
/* Mivvy M310 */
@@ -1159,9 +1157,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
},
/*
* A lot of modern Clevo barebones have touchpad and/or keyboard issues
- * after suspend fixable with nomux + reset + noloop + nopnp. Luckily,
- * none of them have an external PS/2 port so this can safely be set for
- * all of them.
+ * after suspend fixable with the forcenorestore quirk.
* Clevo barebones come with board_vendor and/or system_vendor set to
* either the very generic string "Notebook" and/or a different value
* for each individual reseller. The only somewhat universal way to
@@ -1171,29 +1167,25 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "LAPQC71A"),
},
- .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
- SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
},
{
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "LAPQC71B"),
},
- .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
- SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
},
{
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "N140CU"),
},
- .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
- SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
},
{
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "N141CU"),
},
- .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
- SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
},
{
.matches = {
@@ -1205,29 +1197,19 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "NH5xAx"),
},
- .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
- SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
},
{
- /*
- * Setting SERIO_QUIRK_NOMUX or SERIO_QUIRK_RESET_ALWAYS makes
- * the keyboard very laggy for ~5 seconds after boot and
- * sometimes also after resume.
- * However both are required for the keyboard to not fail
- * completely sometimes after boot or resume.
- */
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "NHxxRZQ"),
},
- .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
- SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
},
{
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),
},
- .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
- SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
},
/*
* At least one modern Clevo barebone has the touchpad connected both
@@ -1243,17 +1225,15 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "NS50MU"),
},
- .driver_data = (void *)(SERIO_QUIRK_NOAUX | SERIO_QUIRK_NOMUX |
- SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP |
- SERIO_QUIRK_NOPNP)
+ .driver_data = (void *)(SERIO_QUIRK_NOAUX |
+ SERIO_QUIRK_FORCENORESTORE)
},
{
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "NS50_70MU"),
},
- .driver_data = (void *)(SERIO_QUIRK_NOAUX | SERIO_QUIRK_NOMUX |
- SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP |
- SERIO_QUIRK_NOPNP)
+ .driver_data = (void *)(SERIO_QUIRK_NOAUX |
+ SERIO_QUIRK_FORCENORESTORE)
},
{
.matches = {
@@ -1265,8 +1245,13 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "NJ50_70CU"),
},
- .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
- SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
+ },
+ {
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "P640RE"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
},
{
/*
@@ -1277,16 +1262,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
.matches = {
DMI_MATCH(DMI_PRODUCT_NAME, "P65xH"),
},
- .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
- SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
},
{
/* Clevo P650RS, 650RP6, Sager NP8152-S, and others */
.matches = {
DMI_MATCH(DMI_PRODUCT_NAME, "P65xRP"),
},
- .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
- SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
},
{
/*
@@ -1297,8 +1280,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
.matches = {
DMI_MATCH(DMI_PRODUCT_NAME, "P65_P67H"),
},
- .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
- SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
},
{
/*
@@ -1309,8 +1291,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
.matches = {
DMI_MATCH(DMI_PRODUCT_NAME, "P65_67RP"),
},
- .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
- SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
},
{
/*
@@ -1321,8 +1302,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
.matches = {
DMI_MATCH(DMI_PRODUCT_NAME, "P65_67RS"),
},
- .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
- SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
},
{
/*
@@ -1333,22 +1313,43 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
.matches = {
DMI_MATCH(DMI_PRODUCT_NAME, "P67xRP"),
},
- .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
- SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
},
{
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "PB50_70DFx,DDx"),
},
- .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
- SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
+ },
+ {
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "PB51RF"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
+ },
+ {
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "PB71RD"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
+ },
+ {
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "PC70DR"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
},
{
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "PCX0DX"),
},
- .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
- SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
+ },
+ {
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "PCX0DX_GN20"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
},
/* See comment on TUXEDO InfinityBook S17 Gen6 / Clevo NS70MU above */
{
@@ -1361,15 +1362,13 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "X170SM"),
},
- .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
- SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
},
{
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "X170KM-G"),
},
- .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
- SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+ .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE)
},
{
/*
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index 066dc04003fa..67264c5b49cb 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -1021,7 +1021,7 @@ static int ads7846_setup_pendown(struct spi_device *spi,
if (pdata->get_pendown_state) {
ts->get_pendown_state = pdata->get_pendown_state;
} else {
- ts->gpio_pendown = gpiod_get(&spi->dev, "pendown", GPIOD_IN);
+ ts->gpio_pendown = devm_gpiod_get(&spi->dev, "pendown", GPIOD_IN);
if (IS_ERR(ts->gpio_pendown)) {
dev_err(&spi->dev, "failed to request pendown GPIO\n");
return PTR_ERR(ts->gpio_pendown);
diff --git a/drivers/input/touchscreen/goodix_berlin_core.c b/drivers/input/touchscreen/goodix_berlin_core.c
index 3fc03cf0ca23..7f8cfdd106fa 100644
--- a/drivers/input/touchscreen/goodix_berlin_core.c
+++ b/drivers/input/touchscreen/goodix_berlin_core.c
@@ -165,7 +165,7 @@ struct goodix_berlin_core {
struct device *dev;
struct regmap *regmap;
struct regulator *avdd;
- struct regulator *iovdd;
+ struct regulator *vddio;
struct gpio_desc *reset_gpio;
struct touchscreen_properties props;
struct goodix_berlin_fw_version fw_version;
@@ -248,22 +248,22 @@ static int goodix_berlin_power_on(struct goodix_berlin_core *cd)
{
int error;
- error = regulator_enable(cd->iovdd);
+ error = regulator_enable(cd->vddio);
if (error) {
- dev_err(cd->dev, "Failed to enable iovdd: %d\n", error);
+ dev_err(cd->dev, "Failed to enable vddio: %d\n", error);
return error;
}
- /* Vendor waits 3ms for IOVDD to settle */
+ /* Vendor waits 3ms for VDDIO to settle */
usleep_range(3000, 3100);
error = regulator_enable(cd->avdd);
if (error) {
dev_err(cd->dev, "Failed to enable avdd: %d\n", error);
- goto err_iovdd_disable;
+ goto err_vddio_disable;
}
- /* Vendor waits 15ms for IOVDD to settle */
+ /* Vendor waits 15ms for AVDD to settle */
usleep_range(15000, 15100);
gpiod_set_value_cansleep(cd->reset_gpio, 0);
@@ -283,8 +283,8 @@ static int goodix_berlin_power_on(struct goodix_berlin_core *cd)
err_dev_reset:
gpiod_set_value_cansleep(cd->reset_gpio, 1);
regulator_disable(cd->avdd);
-err_iovdd_disable:
- regulator_disable(cd->iovdd);
+err_vddio_disable:
+ regulator_disable(cd->vddio);
return error;
}
@@ -292,7 +292,7 @@ static void goodix_berlin_power_off(struct goodix_berlin_core *cd)
{
gpiod_set_value_cansleep(cd->reset_gpio, 1);
regulator_disable(cd->avdd);
- regulator_disable(cd->iovdd);
+ regulator_disable(cd->vddio);
}
static int goodix_berlin_read_version(struct goodix_berlin_core *cd)
@@ -744,10 +744,10 @@ int goodix_berlin_probe(struct device *dev, int irq, const struct input_id *id,
return dev_err_probe(dev, PTR_ERR(cd->avdd),
"Failed to request avdd regulator\n");
- cd->iovdd = devm_regulator_get(dev, "iovdd");
- if (IS_ERR(cd->iovdd))
- return dev_err_probe(dev, PTR_ERR(cd->iovdd),
- "Failed to request iovdd regulator\n");
+ cd->vddio = devm_regulator_get(dev, "vddio");
+ if (IS_ERR(cd->vddio))
+ return dev_err_probe(dev, PTR_ERR(cd->vddio),
+ "Failed to request vddio regulator\n");
error = goodix_berlin_power_on(cd);
if (error) {
diff --git a/drivers/input/touchscreen/imagis.c b/drivers/input/touchscreen/imagis.c
index abeae9102323..3c8bbe284b73 100644
--- a/drivers/input/touchscreen/imagis.c
+++ b/drivers/input/touchscreen/imagis.c
@@ -22,6 +22,7 @@
#define IST3032C_WHOAMI 0x32c
#define IST3038C_WHOAMI 0x38c
+#define IST3038H_WHOAMI 0x38d
#define IST3038B_REG_CHIPID 0x30
#define IST3038B_WHOAMI 0x30380b
@@ -428,11 +429,19 @@ static const struct imagis_properties imagis_3038c_data = {
.protocol_b = true,
};
+static const struct imagis_properties imagis_3038h_data = {
+ .interrupt_msg_cmd = IST3038C_REG_INTR_MESSAGE,
+ .touch_coord_cmd = IST3038C_REG_TOUCH_COORD,
+ .whoami_cmd = IST3038C_REG_CHIPID,
+ .whoami_val = IST3038H_WHOAMI,
+};
+
static const struct of_device_id imagis_of_match[] = {
{ .compatible = "imagis,ist3032c", .data = &imagis_3032c_data },
{ .compatible = "imagis,ist3038", .data = &imagis_3038_data },
{ .compatible = "imagis,ist3038b", .data = &imagis_3038b_data },
{ .compatible = "imagis,ist3038c", .data = &imagis_3038c_data },
+ { .compatible = "imagis,ist3038h", .data = &imagis_3038h_data },
{ },
};
MODULE_DEVICE_TABLE(of, imagis_of_match);
diff --git a/drivers/input/touchscreen/wdt87xx_i2c.c b/drivers/input/touchscreen/wdt87xx_i2c.c
index 27941245e962..88d376090e6e 100644
--- a/drivers/input/touchscreen/wdt87xx_i2c.c
+++ b/drivers/input/touchscreen/wdt87xx_i2c.c
@@ -1153,11 +1153,13 @@ static const struct i2c_device_id wdt87xx_dev_id[] = {
};
MODULE_DEVICE_TABLE(i2c, wdt87xx_dev_id);
+#ifdef CONFIG_ACPI
static const struct acpi_device_id wdt87xx_acpi_id[] = {
{ "WDHT0001", 0 },
{ }
};
MODULE_DEVICE_TABLE(acpi, wdt87xx_acpi_id);
+#endif
static struct i2c_driver wdt87xx_driver = {
.probe = wdt87xx_ts_probe,
diff --git a/drivers/leds/leds-st1202.c b/drivers/leds/leds-st1202.c
index b691c4886993..e894b3f9a0f4 100644
--- a/drivers/leds/leds-st1202.c
+++ b/drivers/leds/leds-st1202.c
@@ -261,8 +261,6 @@ static int st1202_dt_init(struct st1202_chip *chip)
int err, reg;
for_each_available_child_of_node_scoped(dev_of_node(dev), child) {
- struct led_init_data init_data = {};
-
err = of_property_read_u32(child, "reg", &reg);
if (err)
return dev_err_probe(dev, err, "Invalid register\n");
@@ -276,15 +274,6 @@ static int st1202_dt_init(struct st1202_chip *chip)
led->led_cdev.pattern_set = st1202_led_pattern_set;
led->led_cdev.pattern_clear = st1202_led_pattern_clear;
led->led_cdev.default_trigger = "pattern";
-
- init_data.fwnode = led->fwnode;
- init_data.devicename = "st1202";
- init_data.default_label = ":";
-
- err = devm_led_classdev_register_ext(dev, &led->led_cdev, &init_data);
- if (err < 0)
- return dev_err_probe(dev, err, "Failed to register LED class device\n");
-
led->led_cdev.brightness_set = st1202_brightness_set;
led->led_cdev.brightness_get = st1202_brightness_get;
}
@@ -368,6 +357,7 @@ static int st1202_probe(struct i2c_client *client)
return ret;
for (int i = 0; i < ST1202_MAX_LEDS; i++) {
+ struct led_init_data init_data = {};
led = &chip->leds[i];
led->chip = chip;
led->led_num = i;
@@ -384,6 +374,15 @@ static int st1202_probe(struct i2c_client *client)
if (ret < 0)
return dev_err_probe(&client->dev, ret,
"Failed to clear LED pattern\n");
+
+ init_data.fwnode = led->fwnode;
+ init_data.devicename = "st1202";
+ init_data.default_label = ":";
+
+ ret = devm_led_classdev_register_ext(&client->dev, &led->led_cdev, &init_data);
+ if (ret < 0)
+ return dev_err_probe(&client->dev, ret,
+ "Failed to register LED class device\n");
}
return 0;
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 731467d4ed10..b690905ab89f 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -426,7 +426,7 @@ static struct bio *clone_bio(struct dm_target *ti, struct flakey_c *fc, struct b
if (!clone)
return NULL;
- bio_init(clone, fc->dev->bdev, bio->bi_inline_vecs, nr_iovecs, bio->bi_opf);
+ bio_init(clone, fc->dev->bdev, clone->bi_inline_vecs, nr_iovecs, bio->bi_opf);
clone->bi_iter.bi_sector = flakey_map_sector(ti, bio->bi_iter.bi_sector);
clone->bi_private = bio;
diff --git a/drivers/media/dvb-frontends/rtl2832_sdr.c b/drivers/media/dvb-frontends/rtl2832_sdr.c
index 05254d8717db..0357624968f1 100644
--- a/drivers/media/dvb-frontends/rtl2832_sdr.c
+++ b/drivers/media/dvb-frontends/rtl2832_sdr.c
@@ -1363,6 +1363,7 @@ static int rtl2832_sdr_probe(struct platform_device *pdev)
dev->vb_queue.ops = &rtl2832_sdr_vb2_ops;
dev->vb_queue.mem_ops = &vb2_vmalloc_memops;
dev->vb_queue.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
+ dev->vb_queue.lock = &dev->vb_queue_lock;
ret = vb2_queue_init(&dev->vb_queue);
if (ret) {
dev_err(&pdev->dev, "Could not initialize vb2 queue\n");
@@ -1421,7 +1422,6 @@ static int rtl2832_sdr_probe(struct platform_device *pdev)
/* Init video_device structure */
dev->vdev = rtl2832_sdr_template;
dev->vdev.queue = &dev->vb_queue;
- dev->vdev.queue->lock = &dev->vb_queue_lock;
video_set_drvdata(&dev->vdev, dev);
/* Register the v4l2_device structure */
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 327b6ecdc77e..d1b095af253b 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -1242,10 +1242,28 @@ static bool slave_can_set_ns_maddr(const struct bonding *bond, struct slave *sla
slave->dev->flags & IFF_MULTICAST;
}
+/**
+ * slave_set_ns_maddrs - add/del all NS mac addresses for slave
+ * @bond: bond device
+ * @slave: slave device
+ * @add: add or remove all the NS mac addresses
+ *
+ * This function tries to add or delete all the NS mac addresses on the slave
+ *
+ * Note, the IPv6 NS target address is the unicast address in Neighbor
+ * Solicitation (NS) message. The dest address of NS message should be
+ * solicited-node multicast address of the target. The dest mac of NS message
+ * is converted from the solicited-node multicast address.
+ *
+ * This function is called when
+ * * arp_validate changes
+ * * enslaving, releasing new slaves
+ */
static void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool add)
{
struct in6_addr *targets = bond->params.ns_targets;
char slot_maddr[MAX_ADDR_LEN];
+ struct in6_addr mcaddr;
int i;
if (!slave_can_set_ns_maddr(bond, slave))
@@ -1255,7 +1273,8 @@ static void slave_set_ns_maddrs(struct bonding *bond, struct slave *slave, bool
if (ipv6_addr_any(&targets[i]))
break;
- if (!ndisc_mc_map(&targets[i], slot_maddr, slave->dev, 0)) {
+ addrconf_addr_solict_mult(&targets[i], &mcaddr);
+ if (!ndisc_mc_map(&mcaddr, slot_maddr, slave->dev, 0)) {
if (add)
dev_mc_add(slave->dev, slot_maddr);
else
@@ -1278,23 +1297,43 @@ void bond_slave_ns_maddrs_del(struct bonding *bond, struct slave *slave)
slave_set_ns_maddrs(bond, slave, false);
}
+/**
+ * slave_set_ns_maddr - set new NS mac address for slave
+ * @bond: bond device
+ * @slave: slave device
+ * @target: the new IPv6 target
+ * @slot: the old IPv6 target in the slot
+ *
+ * This function tries to replace the old mac address to new one on the slave.
+ *
+ * Note, the target/slot IPv6 address is the unicast address in Neighbor
+ * Solicitation (NS) message. The dest address of NS message should be
+ * solicited-node multicast address of the target. The dest mac of NS message
+ * is converted from the solicited-node multicast address.
+ *
+ * This function is called when
+ * * An IPv6 NS target is added or removed.
+ */
static void slave_set_ns_maddr(struct bonding *bond, struct slave *slave,
struct in6_addr *target, struct in6_addr *slot)
{
- char target_maddr[MAX_ADDR_LEN], slot_maddr[MAX_ADDR_LEN];
+ char mac_addr[MAX_ADDR_LEN];
+ struct in6_addr mcast_addr;
if (!bond->params.arp_validate || !slave_can_set_ns_maddr(bond, slave))
return;
- /* remove the previous maddr from slave */
+ /* remove the previous mac addr from slave */
+ addrconf_addr_solict_mult(slot, &mcast_addr);
if (!ipv6_addr_any(slot) &&
- !ndisc_mc_map(slot, slot_maddr, slave->dev, 0))
- dev_mc_del(slave->dev, slot_maddr);
+ !ndisc_mc_map(&mcast_addr, mac_addr, slave->dev, 0))
+ dev_mc_del(slave->dev, mac_addr);
- /* add new maddr on slave if target is set */
+ /* add new mac addr on slave if target is set */
+ addrconf_addr_solict_mult(target, &mcast_addr);
if (!ipv6_addr_any(target) &&
- !ndisc_mc_map(target, target_maddr, slave->dev, 0))
- dev_mc_add(slave->dev, target_maddr);
+ !ndisc_mc_map(&mcast_addr, mac_addr, slave->dev, 0))
+ dev_mc_add(slave->dev, mac_addr);
}
static void _bond_options_ns_ip6_target_set(struct bonding *bond, int slot,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 68d1e891752b..5db96ca52505 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2208,13 +2208,11 @@ mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port,
return err;
}
-static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port,
- const unsigned char *addr, u16 vid,
- u8 state)
+static int mv88e6xxx_port_db_get(struct mv88e6xxx_chip *chip,
+ const unsigned char *addr, u16 vid,
+ u16 *fid, struct mv88e6xxx_atu_entry *entry)
{
- struct mv88e6xxx_atu_entry entry;
struct mv88e6xxx_vtu_entry vlan;
- u16 fid;
int err;
/* Ports have two private address databases: one for when the port is
@@ -2225,7 +2223,7 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port,
* VLAN ID into the port's database used for VLAN-unaware bridging.
*/
if (vid == 0) {
- fid = MV88E6XXX_FID_BRIDGED;
+ *fid = MV88E6XXX_FID_BRIDGED;
} else {
err = mv88e6xxx_vtu_get(chip, vid, &vlan);
if (err)
@@ -2235,14 +2233,39 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port,
if (!vlan.valid)
return -EOPNOTSUPP;
- fid = vlan.fid;
+ *fid = vlan.fid;
}
- entry.state = 0;
- ether_addr_copy(entry.mac, addr);
- eth_addr_dec(entry.mac);
+ entry->state = 0;
+ ether_addr_copy(entry->mac, addr);
+ eth_addr_dec(entry->mac);
+
+ return mv88e6xxx_g1_atu_getnext(chip, *fid, entry);
+}
+
+static bool mv88e6xxx_port_db_find(struct mv88e6xxx_chip *chip,
+ const unsigned char *addr, u16 vid)
+{
+ struct mv88e6xxx_atu_entry entry;
+ u16 fid;
+ int err;
- err = mv88e6xxx_g1_atu_getnext(chip, fid, &entry);
+ err = mv88e6xxx_port_db_get(chip, addr, vid, &fid, &entry);
+ if (err)
+ return false;
+
+ return entry.state && ether_addr_equal(entry.mac, addr);
+}
+
+static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port,
+ const unsigned char *addr, u16 vid,
+ u8 state)
+{
+ struct mv88e6xxx_atu_entry entry;
+ u16 fid;
+ int err;
+
+ err = mv88e6xxx_port_db_get(chip, addr, vid, &fid, &entry);
if (err)
return err;
@@ -2846,6 +2869,13 @@ static int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port,
mv88e6xxx_reg_lock(chip);
err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid,
MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC);
+ if (err)
+ goto out;
+
+ if (!mv88e6xxx_port_db_find(chip, addr, vid))
+ err = -ENOSPC;
+
+out:
mv88e6xxx_reg_unlock(chip);
return err;
@@ -6614,6 +6644,13 @@ static int mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port,
mv88e6xxx_reg_lock(chip);
err = mv88e6xxx_port_db_load_purge(chip, port, mdb->addr, mdb->vid,
MV88E6XXX_G1_ATU_DATA_STATE_MC_STATIC);
+ if (err)
+ goto out;
+
+ if (!mv88e6xxx_port_db_find(chip, mdb->addr, mdb->vid))
+ err = -ENOSPC;
+
+out:
mv88e6xxx_reg_unlock(chip);
return err;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 7b8b5b39c7bb..55f553debd3b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2038,6 +2038,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
struct rx_cmp_ext *rxcmp1;
u32 tmp_raw_cons = *raw_cons;
u16 cons, prod, cp_cons = RING_CMP(tmp_raw_cons);
+ struct skb_shared_info *sinfo;
struct bnxt_sw_rx_bd *rx_buf;
unsigned int len;
u8 *data_ptr, agg_bufs, cmp_type;
@@ -2164,6 +2165,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
false);
if (!frag_len)
goto oom_next_rx;
+
}
xdp_active = true;
}
@@ -2173,6 +2175,12 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
rc = 1;
goto next_rx;
}
+ if (xdp_buff_has_frags(&xdp)) {
+ sinfo = xdp_get_shared_info_from_buff(&xdp);
+ agg_bufs = sinfo->nr_frags;
+ } else {
+ agg_bufs = 0;
+ }
}
if (len <= bp->rx_copybreak) {
@@ -2210,7 +2218,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
if (!skb)
goto oom_next_rx;
} else {
- skb = bnxt_xdp_build_skb(bp, skb, agg_bufs, rxr->page_pool, &xdp, rxcmp1);
+ skb = bnxt_xdp_build_skb(bp, skb, agg_bufs,
+ rxr->page_pool, &xdp);
if (!skb) {
/* we should be able to free the old skb here */
bnxt_xdp_buff_frags_free(rxr, &xdp);
@@ -15375,6 +15384,9 @@ static void bnxt_get_queue_stats_rx(struct net_device *dev, int i,
struct bnxt_cp_ring_info *cpr;
u64 *sw;
+ if (!bp->bnapi)
+ return;
+
cpr = &bp->bnapi[i]->cp_ring;
sw = cpr->stats.sw_stats;
@@ -15398,6 +15410,9 @@ static void bnxt_get_queue_stats_tx(struct net_device *dev, int i,
struct bnxt_napi *bnapi;
u64 *sw;
+ if (!bp->tx_ring)
+ return;
+
bnapi = bp->tx_ring[bp->tx_ring_map[i]].bnapi;
sw = bnapi->cp_ring.stats.sw_stats;
@@ -15439,6 +15454,9 @@ static int bnxt_queue_mem_alloc(struct net_device *dev, void *qmem, int idx)
struct bnxt_ring_struct *ring;
int rc;
+ if (!bp->rx_ring)
+ return -ENETDOWN;
+
rxr = &bp->rx_ring[idx];
clone = qmem;
memcpy(clone, rxr, sizeof(*rxr));
@@ -15521,6 +15539,7 @@ static void bnxt_queue_mem_free(struct net_device *dev, void *qmem)
struct bnxt_ring_struct *ring;
bnxt_free_one_rx_ring_skbs(bp, rxr);
+ bnxt_free_one_tpa_info(bp, rxr);
xdp_rxq_info_unreg(&rxr->xdp_rxq);
@@ -15632,7 +15651,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx)
cpr = &rxr->bnapi->cp_ring;
cpr->sw_stats->rx.rx_resets++;
- for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) {
+ for (i = 0; i <= bp->nr_vnics; i++) {
vnic = &bp->vnic_info[i];
rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic, true);
@@ -15660,7 +15679,7 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx)
struct bnxt_vnic_info *vnic;
int i;
- for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) {
+ for (i = 0; i <= bp->nr_vnics; i++) {
vnic = &bp->vnic_info[i];
vnic->mru = 0;
bnxt_hwrm_vnic_update(bp, vnic,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index e6c64e4bd66c..299822cacca4 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -460,23 +460,16 @@ int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp)
struct sk_buff *
bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags,
- struct page_pool *pool, struct xdp_buff *xdp,
- struct rx_cmp_ext *rxcmp1)
+ struct page_pool *pool, struct xdp_buff *xdp)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
if (!skb)
return NULL;
- skb_checksum_none_assert(skb);
- if (RX_CMP_L4_CS_OK(rxcmp1)) {
- if (bp->dev->features & NETIF_F_RXCSUM) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- skb->csum_level = RX_CMP_ENCAP(rxcmp1);
- }
- }
+
xdp_update_skb_shared_info(skb, num_frags,
sinfo->xdp_frags_size,
- BNXT_RX_PAGE_SIZE * sinfo->nr_frags,
+ BNXT_RX_PAGE_SIZE * num_frags,
xdp_buff_is_frag_pfmemalloc(xdp));
return skb;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
index 0122782400b8..220285e190fc 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
@@ -33,6 +33,5 @@ void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr,
struct xdp_buff *xdp);
struct sk_buff *bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb,
u8 num_frags, struct page_pool *pool,
- struct xdp_buff *xdp,
- struct rx_cmp_ext *rxcmp1);
+ struct xdp_buff *xdp);
#endif
diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c
index 7cee365cc7d1..405ddd17de1b 100644
--- a/drivers/net/ethernet/intel/ice/ice_arfs.c
+++ b/drivers/net/ethernet/intel/ice/ice_arfs.c
@@ -511,7 +511,7 @@ void ice_init_arfs(struct ice_vsi *vsi)
struct hlist_head *arfs_fltr_list;
unsigned int i;
- if (!vsi || vsi->type != ICE_VSI_PF)
+ if (!vsi || vsi->type != ICE_VSI_PF || ice_is_arfs_active(vsi))
return;
arfs_fltr_list = kcalloc(ICE_MAX_ARFS_LIST, sizeof(*arfs_fltr_list),
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c
index d649c197cf67..ed21d7f55ac1 100644
--- a/drivers/net/ethernet/intel/ice/ice_eswitch.c
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c
@@ -49,9 +49,6 @@ static int ice_eswitch_setup_env(struct ice_pf *pf)
if (vlan_ops->dis_rx_filtering(uplink_vsi))
goto err_vlan_filtering;
- if (ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_set_allow_override))
- goto err_override_uplink;
-
if (ice_vsi_update_local_lb(uplink_vsi, true))
goto err_override_local_lb;
@@ -63,8 +60,6 @@ static int ice_eswitch_setup_env(struct ice_pf *pf)
err_up:
ice_vsi_update_local_lb(uplink_vsi, false);
err_override_local_lb:
- ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override);
-err_override_uplink:
vlan_ops->ena_rx_filtering(uplink_vsi);
err_vlan_filtering:
ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, false,
@@ -275,7 +270,6 @@ static void ice_eswitch_release_env(struct ice_pf *pf)
vlan_ops = ice_get_compat_vsi_vlan_ops(uplink_vsi);
ice_vsi_update_local_lb(uplink_vsi, false);
- ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override);
vlan_ops->ena_rx_filtering(uplink_vsi);
ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, false,
ICE_FLTR_TX);
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c
index 1ccb572ce285..22371011c249 100644
--- a/drivers/net/ethernet/intel/ice/ice_lag.c
+++ b/drivers/net/ethernet/intel/ice/ice_lag.c
@@ -1001,6 +1001,28 @@ static void ice_lag_link(struct ice_lag *lag)
}
/**
+ * ice_lag_config_eswitch - configure eswitch to work with LAG
+ * @lag: lag info struct
+ * @netdev: active network interface device struct
+ *
+ * Updates all port representors in eswitch to use @netdev for Tx.
+ *
+ * Configures the netdev to keep dst metadata (also used in representor Tx).
+ * This is required for an uplink without switchdev mode configured.
+ */
+static void ice_lag_config_eswitch(struct ice_lag *lag,
+ struct net_device *netdev)
+{
+ struct ice_repr *repr;
+ unsigned long id;
+
+ xa_for_each(&lag->pf->eswitch.reprs, id, repr)
+ repr->dst->u.port_info.lower_dev = netdev;
+
+ netif_keep_dst(netdev);
+}
+
+/**
* ice_lag_unlink - handle unlink event
* @lag: LAG info struct
*/
@@ -1021,6 +1043,9 @@ static void ice_lag_unlink(struct ice_lag *lag)
ice_lag_move_vf_nodes(lag, act_port, pri_port);
lag->primary = false;
lag->active_port = ICE_LAG_INVALID_PORT;
+
+ /* Config primary's eswitch back to normal operation. */
+ ice_lag_config_eswitch(lag, lag->netdev);
} else {
struct ice_lag *primary_lag;
@@ -1419,6 +1444,7 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr)
ice_lag_move_vf_nodes(lag, prim_port,
event_port);
lag->active_port = event_port;
+ ice_lag_config_eswitch(lag, event_netdev);
return;
}
@@ -1428,6 +1454,7 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr)
/* new active port */
ice_lag_move_vf_nodes(lag, lag->active_port, event_port);
lag->active_port = event_port;
+ ice_lag_config_eswitch(lag, event_netdev);
} else {
/* port not set as currently active (e.g. new active port
* has already claimed the nodes and filters
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 38a1c8372180..d0faa087793d 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -3937,24 +3937,6 @@ void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx)
}
/**
- * ice_vsi_ctx_set_allow_override - allow destination override on VSI
- * @ctx: pointer to VSI ctx structure
- */
-void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx)
-{
- ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD;
-}
-
-/**
- * ice_vsi_ctx_clear_allow_override - turn off destination override on VSI
- * @ctx: pointer to VSI ctx structure
- */
-void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx)
-{
- ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD;
-}
-
-/**
* ice_vsi_update_local_lb - update sw block in VSI with local loopback bit
* @vsi: pointer to VSI structure
* @set: set or unset the bit
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index eabb35834a24..b4c9cb28a016 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -105,10 +105,6 @@ ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *))
void ice_vsi_ctx_set_antispoof(struct ice_vsi_ctx *ctx);
void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx);
-
-void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx);
-
-void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx);
int ice_vsi_update_local_lb(struct ice_vsi *vsi, bool set);
int ice_vsi_add_vlan_zero(struct ice_vsi *vsi);
int ice_vsi_del_vlan_zero(struct ice_vsi *vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index c3a0fb97c5ee..e13bd5a6cb6c 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -5065,16 +5065,16 @@ static int ice_init_devlink(struct ice_pf *pf)
return err;
ice_devlink_init_regions(pf);
- ice_health_init(pf);
ice_devlink_register(pf);
+ ice_health_init(pf);
return 0;
}
static void ice_deinit_devlink(struct ice_pf *pf)
{
- ice_devlink_unregister(pf);
ice_health_deinit(pf);
+ ice_devlink_unregister(pf);
ice_devlink_destroy_regions(pf);
ice_devlink_unregister_params(pf);
}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 9c9ea4c1b93b..380ba1e8b3b2 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -2424,7 +2424,9 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring)
ICE_TXD_CTX_QW1_CMD_S);
ice_tstamp(tx_ring, skb, first, &offload);
- if (ice_is_switchdev_running(vsi->back) && vsi->type != ICE_VSI_SF)
+ if ((ice_is_switchdev_running(vsi->back) ||
+ ice_lag_is_switchdev_running(vsi->back)) &&
+ vsi->type != ICE_VSI_SF)
ice_eswitch_set_target_vsi(skb, &offload);
if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index 98d4306929f3..a2cf3e79693d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -46,6 +46,9 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
u32 running_fw, stored_fw;
int err;
+ if (!mlx5_core_is_pf(dev))
+ return 0;
+
err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
index 5d128c5b4529..0f5d7ea8956f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
@@ -48,15 +48,10 @@ mlx5_esw_bridge_lag_rep_get(struct net_device *dev, struct mlx5_eswitch *esw)
struct list_head *iter;
netdev_for_each_lower_dev(dev, lower, iter) {
- struct mlx5_core_dev *mdev;
- struct mlx5e_priv *priv;
-
if (!mlx5e_eswitch_rep(lower))
continue;
- priv = netdev_priv(lower);
- mdev = priv->mdev;
- if (mlx5_lag_is_shared_fdb(mdev) && mlx5_esw_bridge_dev_same_esw(lower, esw))
+ if (mlx5_esw_bridge_dev_same_esw(lower, esw))
return lower;
}
@@ -125,7 +120,7 @@ static bool mlx5_esw_bridge_is_local(struct net_device *dev, struct net_device *
priv = netdev_priv(rep);
mdev = priv->mdev;
if (netif_is_lag_master(dev))
- return mlx5_lag_is_shared_fdb(mdev) && mlx5_lag_is_master(mdev);
+ return mlx5_lag_is_master(mdev);
return true;
}
@@ -455,6 +450,9 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb,
if (!rep)
return NOTIFY_DONE;
+ if (netif_is_lag_master(dev) && !mlx5_lag_is_shared_fdb(esw->dev))
+ return NOTIFY_DONE;
+
switch (event) {
case SWITCHDEV_FDB_ADD_TO_BRIDGE:
fdb_info = container_of(info,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index a814b63ed97e..8fcaee381b0e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5132,11 +5132,9 @@ static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5_core_dev *mdev = priv->mdev;
u8 mode, setting;
- int err;
- err = mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting);
- if (err)
- return err;
+ if (mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting))
+ return -EOPNOTSUPP;
mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB;
return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
mode,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 2b229b6226c6..dfb079e59d85 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -871,8 +871,8 @@ static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx)
static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx)
{
+ struct mlx5_irq_pool *pool = mlx5_irq_table_get_comp_irq_pool(dev);
struct mlx5_eq_table *table = dev->priv.eq_table;
- struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
struct irq_affinity_desc af_desc = {};
struct mlx5_irq *irq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
index 1477db7f5307..2691d88cdee1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
@@ -175,7 +175,7 @@ unlock:
void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *irq)
{
- struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
+ struct mlx5_irq_pool *pool = mlx5_irq_get_pool(irq);
int cpu;
cpu = cpumask_first(mlx5_irq_get_affinity_mask(irq));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index cea5aa314f6c..ed2ba272946b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -951,7 +951,7 @@ void mlx5_disable_lag(struct mlx5_lag *ldev)
mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
}
-static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
+bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
{
int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
struct mlx5_core_dev *dev;
@@ -1038,7 +1038,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
}
if (do_bond && !__mlx5_lag_is_active(ldev)) {
- bool shared_fdb = mlx5_shared_fdb_supported(ldev);
+ bool shared_fdb = mlx5_lag_shared_fdb_supported(ldev);
roce_lag = mlx5_lag_is_roce_lag(ldev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
index 01cf72366947..c2f256bb2bc2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
@@ -92,6 +92,7 @@ mlx5_lag_is_ready(struct mlx5_lag *ldev)
return test_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
}
+bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev);
bool mlx5_lag_check_prereq(struct mlx5_lag *ldev);
void mlx5_modify_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
index ffac0bd6c895..1770297a112e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
@@ -83,7 +83,8 @@ static int enable_mpesw(struct mlx5_lag *ldev)
if (mlx5_eswitch_mode(dev0) != MLX5_ESWITCH_OFFLOADS ||
!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table) ||
!MLX5_CAP_GEN(dev0, create_lag_when_not_master_up) ||
- !mlx5_lag_check_prereq(ldev))
+ !mlx5_lag_check_prereq(ldev) ||
+ !mlx5_lag_shared_fdb_supported(ldev))
return -EOPNOTSUPP;
err = mlx5_mpesw_metadata_set(ldev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
index a80ecb672f33..711d14dea248 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
@@ -196,6 +196,11 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains,
ns = mlx5_get_flow_namespace(chains->dev, chains->ns);
}
+ if (!ns) {
+ mlx5_core_warn(chains->dev, "Failed to get flow namespace\n");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
ft_attr.autogroup.num_reserved_entries = 2;
ft_attr.autogroup.max_num_groups = chains->group_num;
ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
index 0881e961d8b1..586688da9940 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
@@ -10,12 +10,15 @@
struct mlx5_irq;
struct cpu_rmap;
+struct mlx5_irq_pool;
int mlx5_irq_table_init(struct mlx5_core_dev *dev);
void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
int mlx5_irq_table_create(struct mlx5_core_dev *dev);
void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev);
+struct mlx5_irq_pool *
+mlx5_irq_table_get_comp_irq_pool(struct mlx5_core_dev *dev);
int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table);
int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table);
struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev);
@@ -38,7 +41,6 @@ struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq);
int mlx5_irq_get_index(struct mlx5_irq *irq);
int mlx5_irq_get_irq(const struct mlx5_irq *irq);
-struct mlx5_irq_pool;
#ifdef CONFIG_MLX5_SF
struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev,
struct cpumask *used_cpus, u16 vecidx);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index d9362eabc6a1..2c5f850c31f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -378,6 +378,11 @@ int mlx5_irq_get_index(struct mlx5_irq *irq)
return irq->map.index;
}
+struct mlx5_irq_pool *mlx5_irq_get_pool(struct mlx5_irq *irq)
+{
+ return irq->pool;
+}
+
/* irq_pool API */
/* requesting an irq from a given pool according to given index */
@@ -405,18 +410,20 @@ static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_tab
return irq_table->sf_ctrl_pool;
}
-static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table)
+static struct mlx5_irq_pool *
+sf_comp_irq_pool_get(struct mlx5_irq_table *irq_table)
{
return irq_table->sf_comp_pool;
}
-struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
+struct mlx5_irq_pool *
+mlx5_irq_table_get_comp_irq_pool(struct mlx5_core_dev *dev)
{
struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
struct mlx5_irq_pool *pool = NULL;
if (mlx5_core_is_sf(dev))
- pool = sf_irq_pool_get(irq_table);
+ pool = sf_comp_irq_pool_get(irq_table);
/* In some configs, there won't be a pool of SFs IRQs. Hence, returning
* the PF IRQs pool in case the SF pool doesn't exist.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
index c4d377f8df30..cc064425fe16 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
@@ -28,7 +28,6 @@ struct mlx5_irq_pool {
struct mlx5_core_dev *dev;
};
-struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev);
static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool)
{
return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf"));
@@ -40,5 +39,6 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
int mlx5_irq_get_locked(struct mlx5_irq *irq);
int mlx5_irq_read_locked(struct mlx5_irq *irq);
int mlx5_irq_put(struct mlx5_irq *irq);
+struct mlx5_irq_pool *mlx5_irq_get_pool(struct mlx5_irq *irq);
#endif /* __PCI_IRQ_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h
index f9f569131dde..47f7ed141553 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h
@@ -24,8 +24,8 @@ struct mlx5hws_bwc_matcher {
struct mlx5hws_matcher *matcher;
struct mlx5hws_match_template *mt;
struct mlx5hws_action_template *at[MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM];
+ u32 priority;
u8 num_of_at;
- u16 priority;
u8 size_log;
atomic_t num_of_rules;
struct list_head *rules;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h
index 5f409dc30aca..3d5afc832fa5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h
@@ -210,6 +210,10 @@ struct mlx5dr_ste_ctx {
void (*set_encap_l3)(u8 *hw_ste_p, u8 *frst_s_action,
u8 *scnd_d_action, u32 reformat_id,
int size);
+ void (*set_insert_hdr)(u8 *hw_ste_p, u8 *d_action, u32 reformat_id,
+ u8 anchor, u8 offset, int size);
+ void (*set_remove_hdr)(u8 *hw_ste_p, u8 *s_action, u8 anchor,
+ u8 offset, int size);
/* Send */
void (*prepare_for_postsend)(u8 *hw_ste_p, u32 ste_size);
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c
index 7f83d77c43ef..6447efbae00d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c
@@ -266,10 +266,10 @@ void dr_ste_v1_set_encap(u8 *hw_ste_p, u8 *d_action, u32 reformat_id, int size)
dr_ste_v1_set_reparse(hw_ste_p);
}
-static void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action,
- u32 reformat_id,
- u8 anchor, u8 offset,
- int size)
+void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action,
+ u32 reformat_id,
+ u8 anchor, u8 offset,
+ int size)
{
MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action,
action_id, DR_STE_V1_ACTION_ID_INSERT_POINTER);
@@ -286,9 +286,9 @@ static void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action,
dr_ste_v1_set_reparse(hw_ste_p);
}
-static void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action,
- u8 anchor, u8 offset,
- int size)
+void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action,
+ u8 anchor, u8 offset,
+ int size)
{
MLX5_SET(ste_single_action_remove_header_size_v1, s_action,
action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE);
@@ -584,11 +584,11 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx,
action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
action_sz = DR_STE_ACTION_TRIPLE_SZ;
}
- dr_ste_v1_set_insert_hdr(last_ste, action,
- attr->reformat.id,
- attr->reformat.param_0,
- attr->reformat.param_1,
- attr->reformat.size);
+ ste_ctx->set_insert_hdr(last_ste, action,
+ attr->reformat.id,
+ attr->reformat.param_0,
+ attr->reformat.param_1,
+ attr->reformat.size);
action_sz -= DR_STE_ACTION_DOUBLE_SZ;
action += DR_STE_ACTION_DOUBLE_SZ;
} else if (action_type_set[DR_ACTION_TYP_REMOVE_HDR]) {
@@ -597,10 +597,10 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx,
action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
action_sz = DR_STE_ACTION_TRIPLE_SZ;
}
- dr_ste_v1_set_remove_hdr(last_ste, action,
- attr->reformat.param_0,
- attr->reformat.param_1,
- attr->reformat.size);
+ ste_ctx->set_remove_hdr(last_ste, action,
+ attr->reformat.param_0,
+ attr->reformat.param_1,
+ attr->reformat.size);
action_sz -= DR_STE_ACTION_SINGLE_SZ;
action += DR_STE_ACTION_SINGLE_SZ;
}
@@ -792,11 +792,11 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx,
action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
action_sz = DR_STE_ACTION_TRIPLE_SZ;
}
- dr_ste_v1_set_insert_hdr(last_ste, action,
- attr->reformat.id,
- attr->reformat.param_0,
- attr->reformat.param_1,
- attr->reformat.size);
+ ste_ctx->set_insert_hdr(last_ste, action,
+ attr->reformat.id,
+ attr->reformat.param_0,
+ attr->reformat.param_1,
+ attr->reformat.size);
action_sz -= DR_STE_ACTION_DOUBLE_SZ;
action += DR_STE_ACTION_DOUBLE_SZ;
allow_modify_hdr = false;
@@ -808,10 +808,10 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx,
allow_modify_hdr = true;
allow_ctr = true;
}
- dr_ste_v1_set_remove_hdr(last_ste, action,
- attr->reformat.param_0,
- attr->reformat.param_1,
- attr->reformat.size);
+ ste_ctx->set_remove_hdr(last_ste, action,
+ attr->reformat.param_0,
+ attr->reformat.param_1,
+ attr->reformat.size);
action_sz -= DR_STE_ACTION_SINGLE_SZ;
action += DR_STE_ACTION_SINGLE_SZ;
}
@@ -2200,6 +2200,8 @@ static struct mlx5dr_ste_ctx ste_ctx_v1 = {
.set_pop_vlan = &dr_ste_v1_set_pop_vlan,
.set_rx_decap = &dr_ste_v1_set_rx_decap,
.set_encap_l3 = &dr_ste_v1_set_encap_l3,
+ .set_insert_hdr = &dr_ste_v1_set_insert_hdr,
+ .set_remove_hdr = &dr_ste_v1_set_remove_hdr,
/* Send */
.prepare_for_postsend = &dr_ste_v1_prepare_for_postsend,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h
index a8d9e308d339..591c20c95a6a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h
@@ -156,6 +156,10 @@ void dr_ste_v1_set_pop_vlan(u8 *hw_ste_p, u8 *s_action, u8 vlans_num);
void dr_ste_v1_set_encap_l3(u8 *hw_ste_p, u8 *frst_s_action, u8 *scnd_d_action,
u32 reformat_id, int size);
void dr_ste_v1_set_rx_decap(u8 *hw_ste_p, u8 *s_action);
+void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, u32 reformat_id,
+ u8 anchor, u8 offset, int size);
+void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action, u8 anchor,
+ u8 offset, int size);
void dr_ste_v1_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_domain *dmn,
u8 *action_type_set, u32 actions_caps, u8 *last_ste,
struct mlx5dr_ste_actions_attr *attr, u32 *added_stes);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c
index 0882dba0f64b..d0ebaf820d42 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c
@@ -69,6 +69,8 @@ static struct mlx5dr_ste_ctx ste_ctx_v2 = {
.set_pop_vlan = &dr_ste_v1_set_pop_vlan,
.set_rx_decap = &dr_ste_v1_set_rx_decap,
.set_encap_l3 = &dr_ste_v1_set_encap_l3,
+ .set_insert_hdr = &dr_ste_v1_set_insert_hdr,
+ .set_remove_hdr = &dr_ste_v1_set_remove_hdr,
/* Send */
.prepare_for_postsend = &dr_ste_v1_prepare_for_postsend,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c
index cc60ce1d274e..e468a9ae44e8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c
@@ -79,6 +79,46 @@ static void dr_ste_v3_set_rx_decap(u8 *hw_ste_p, u8 *s_action)
dr_ste_v1_set_reparse(hw_ste_p);
}
+static void dr_ste_v3_set_insert_hdr(u8 *hw_ste_p, u8 *d_action,
+ u32 reformat_id, u8 anchor,
+ u8 offset, int size)
+{
+ MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action,
+ action_id, DR_STE_V1_ACTION_ID_INSERT_POINTER);
+ MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action,
+ start_anchor, anchor);
+
+ /* The hardware expects here size and offset in words (2 byte) */
+ MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action,
+ size, size / 2);
+ MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action,
+ start_offset, offset / 2);
+
+ MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action,
+ pointer, reformat_id);
+ MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action,
+ attributes, DR_STE_V1_ACTION_INSERT_PTR_ATTR_NONE);
+
+ dr_ste_v1_set_reparse(hw_ste_p);
+}
+
+static void dr_ste_v3_set_remove_hdr(u8 *hw_ste_p, u8 *s_action,
+ u8 anchor, u8 offset, int size)
+{
+ MLX5_SET(ste_single_action_remove_header_size_v3, s_action,
+ action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE);
+ MLX5_SET(ste_single_action_remove_header_size_v3, s_action,
+ start_anchor, anchor);
+
+ /* The hardware expects here size and offset in words (2 byte) */
+ MLX5_SET(ste_single_action_remove_header_size_v3, s_action,
+ remove_size, size / 2);
+ MLX5_SET(ste_single_action_remove_header_size_v3, s_action,
+ start_offset, offset / 2);
+
+ dr_ste_v1_set_reparse(hw_ste_p);
+}
+
static int
dr_ste_v3_set_action_decap_l3_list(void *data, u32 data_sz,
u8 *hw_action, u32 hw_action_sz,
@@ -211,6 +251,8 @@ static struct mlx5dr_ste_ctx ste_ctx_v3 = {
.set_pop_vlan = &dr_ste_v3_set_pop_vlan,
.set_rx_decap = &dr_ste_v3_set_rx_decap,
.set_encap_l3 = &dr_ste_v3_set_encap_l3,
+ .set_insert_hdr = &dr_ste_v3_set_insert_hdr,
+ .set_remove_hdr = &dr_ste_v3_set_remove_hdr,
/* Send */
.prepare_for_postsend = &dr_ste_v1_prepare_for_postsend,
};
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index be95336ce089..11457b6296cc 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -1547,6 +1547,7 @@ unmap_bar:
* adapter-MTU file and apc->mana_pci_debugfs folder.
*/
debugfs_remove_recursive(gc->mana_pci_debugfs);
+ gc->mana_pci_debugfs = NULL;
pci_iounmap(pdev, bar0_va);
free_gc:
pci_set_drvdata(pdev, NULL);
@@ -1569,6 +1570,8 @@ static void mana_gd_remove(struct pci_dev *pdev)
debugfs_remove_recursive(gc->mana_pci_debugfs);
+ gc->mana_pci_debugfs = NULL;
+
pci_iounmap(pdev, gc->bar0_va);
vfree(gc);
@@ -1622,6 +1625,8 @@ static void mana_gd_shutdown(struct pci_dev *pdev)
debugfs_remove_recursive(gc->mana_pci_debugfs);
+ gc->mana_pci_debugfs = NULL;
+
pci_disable_device(pdev);
}
@@ -1648,8 +1653,10 @@ static int __init mana_driver_init(void)
mana_debugfs_root = debugfs_create_dir("mana", NULL);
err = pci_register_driver(&mana_driver);
- if (err)
+ if (err) {
debugfs_remove(mana_debugfs_root);
+ mana_debugfs_root = NULL;
+ }
return err;
}
@@ -1659,6 +1666,8 @@ static void __exit mana_driver_exit(void)
pci_unregister_driver(&mana_driver);
debugfs_remove(mana_debugfs_root);
+
+ mana_debugfs_root = NULL;
}
module_init(mana_driver_init);
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index aa1e47233fe5..ae76ecc7a5d3 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -738,12 +738,11 @@ static const struct net_device_ops mana_devops = {
static void mana_cleanup_port_context(struct mana_port_context *apc)
{
/*
- * at this point all dir/files under the vport directory
- * are already cleaned up.
- * We are sure the apc->mana_port_debugfs remove will not
- * cause any freed memory access issues
+ * make sure subsequent cleanup attempts don't end up removing already
+ * cleaned dentry pointer
*/
debugfs_remove(apc->mana_port_debugfs);
+ apc->mana_port_debugfs = NULL;
kfree(apc->rxqs);
apc->rxqs = NULL;
}
@@ -1254,6 +1253,7 @@ static void mana_destroy_eq(struct mana_context *ac)
return;
debugfs_remove_recursive(ac->mana_eqs_debugfs);
+ ac->mana_eqs_debugfs = NULL;
for (i = 0; i < gc->max_num_queues; i++) {
eq = ac->eqs[i].eq;
@@ -1914,6 +1914,7 @@ static void mana_destroy_txq(struct mana_port_context *apc)
for (i = 0; i < apc->num_queues; i++) {
debugfs_remove_recursive(apc->tx_qp[i].mana_tx_debugfs);
+ apc->tx_qp[i].mana_tx_debugfs = NULL;
napi = &apc->tx_qp[i].tx_cq.napi;
if (apc->tx_qp[i].txq.napi_initialized) {
@@ -2099,6 +2100,7 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
return;
debugfs_remove_recursive(rxq->mana_rx_debugfs);
+ rxq->mana_rx_debugfs = NULL;
napi = &rxq->rx_cq.napi;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
index f9dd50152b1e..28d24d59efb8 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
@@ -454,8 +454,10 @@ static int qlcnic_sriov_set_guest_vlan_mode(struct qlcnic_adapter *adapter,
num_vlans = sriov->num_allowed_vlans;
sriov->allowed_vlans = kcalloc(num_vlans, sizeof(u16), GFP_KERNEL);
- if (!sriov->allowed_vlans)
+ if (!sriov->allowed_vlans) {
+ qlcnic_sriov_free_vlans(adapter);
return -ENOMEM;
+ }
vlans = (u16 *)&cmd->rsp.arg[3];
for (i = 0; i < num_vlans; i++)
@@ -2167,8 +2169,10 @@ int qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *adapter)
vf = &sriov->vf_info[i];
vf->sriov_vlans = kcalloc(sriov->num_allowed_vlans,
sizeof(*vf->sriov_vlans), GFP_KERNEL);
- if (!vf->sriov_vlans)
+ if (!vf->sriov_vlans) {
+ qlcnic_sriov_free_vlans(adapter);
return -ENOMEM;
+ }
}
return 0;
diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c
index 3bd11cb56294..2aacc1996796 100644
--- a/drivers/net/ethernet/realtek/rtase/rtase_main.c
+++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c
@@ -1501,7 +1501,10 @@ static void rtase_wait_for_quiescence(const struct net_device *dev)
static void rtase_sw_reset(struct net_device *dev)
{
struct rtase_private *tp = netdev_priv(dev);
+ struct rtase_ring *ring, *tmp;
+ struct rtase_int_vector *ivec;
int ret;
+ u32 i;
netif_stop_queue(dev);
netif_carrier_off(dev);
@@ -1512,6 +1515,13 @@ static void rtase_sw_reset(struct net_device *dev)
rtase_tx_clear(tp);
rtase_rx_clear(tp);
+ for (i = 0; i < tp->int_nums; i++) {
+ ivec = &tp->int_vector[i];
+ list_for_each_entry_safe(ring, tmp, &ivec->ring_list,
+ ring_entry)
+ list_del(&ring->ring_entry);
+ }
+
ret = rtase_init_ring(dev);
if (ret) {
netdev_err(dev, "unable to init ring\n");
diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c
index e3dcdeacc12c..d74d47dd6e04 100644
--- a/drivers/net/mctp/mctp-i2c.c
+++ b/drivers/net/mctp/mctp-i2c.c
@@ -583,6 +583,7 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev,
struct mctp_i2c_hdr *hdr;
struct mctp_hdr *mhdr;
u8 lldst, llsrc;
+ int rc;
if (len > MCTP_I2C_MAXMTU)
return -EMSGSIZE;
@@ -593,6 +594,10 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev,
lldst = *((u8 *)daddr);
llsrc = *((u8 *)saddr);
+ rc = skb_cow_head(skb, sizeof(struct mctp_i2c_hdr));
+ if (rc)
+ return rc;
+
skb_push(skb, sizeof(struct mctp_i2c_hdr));
skb_reset_mac_header(skb);
hdr = (void *)skb_mac_header(skb);
diff --git a/drivers/net/mctp/mctp-i3c.c b/drivers/net/mctp/mctp-i3c.c
index c1e72253063b..c678f79aa356 100644
--- a/drivers/net/mctp/mctp-i3c.c
+++ b/drivers/net/mctp/mctp-i3c.c
@@ -506,10 +506,15 @@ static int mctp_i3c_header_create(struct sk_buff *skb, struct net_device *dev,
const void *saddr, unsigned int len)
{
struct mctp_i3c_internal_hdr *ihdr;
+ int rc;
if (!daddr || !saddr)
return -EINVAL;
+ rc = skb_cow_head(skb, sizeof(struct mctp_i3c_internal_hdr));
+ if (rc)
+ return rc;
+
skb_push(skb, sizeof(struct mctp_i3c_internal_hdr));
skb_reset_mac_header(skb);
ihdr = (void *)skb_mac_header(skb);
diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c
index 34231b5b9175..e9fc54517449 100644
--- a/drivers/net/phy/nxp-c45-tja11xx.c
+++ b/drivers/net/phy/nxp-c45-tja11xx.c
@@ -22,6 +22,11 @@
#define PHY_ID_TJA_1103 0x001BB010
#define PHY_ID_TJA_1120 0x001BB031
+#define VEND1_DEVICE_ID3 0x0004
+#define TJA1120_DEV_ID3_SILICON_VERSION GENMASK(15, 12)
+#define TJA1120_DEV_ID3_SAMPLE_TYPE GENMASK(11, 8)
+#define DEVICE_ID3_SAMPLE_TYPE_R 0x9
+
#define VEND1_DEVICE_CONTROL 0x0040
#define DEVICE_CONTROL_RESET BIT(15)
#define DEVICE_CONTROL_CONFIG_GLOBAL_EN BIT(14)
@@ -109,6 +114,9 @@
#define MII_BASIC_CONFIG_RMII 0x5
#define MII_BASIC_CONFIG_MII 0x4
+#define VEND1_SGMII_BASIC_CONTROL 0xB000
+#define SGMII_LPM BIT(11)
+
#define VEND1_SYMBOL_ERROR_CNT_XTD 0x8351
#define EXTENDED_CNT_EN BIT(15)
#define VEND1_MONITOR_STATUS 0xAC80
@@ -1593,6 +1601,63 @@ static int nxp_c45_set_phy_mode(struct phy_device *phydev)
return 0;
}
+/* Errata: ES_TJA1120 and ES_TJA1121 Rev. 1.0 — 28 November 2024 Section 3.1 & 3.2 */
+static void nxp_c45_tja1120_errata(struct phy_device *phydev)
+{
+ bool macsec_ability, sgmii_ability;
+ int silicon_version, sample_type;
+ int phy_abilities;
+ int ret = 0;
+
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_DEVICE_ID3);
+ if (ret < 0)
+ return;
+
+ sample_type = FIELD_GET(TJA1120_DEV_ID3_SAMPLE_TYPE, ret);
+ if (sample_type != DEVICE_ID3_SAMPLE_TYPE_R)
+ return;
+
+ silicon_version = FIELD_GET(TJA1120_DEV_ID3_SILICON_VERSION, ret);
+
+ phy_abilities = phy_read_mmd(phydev, MDIO_MMD_VEND1,
+ VEND1_PORT_ABILITIES);
+ macsec_ability = !!(phy_abilities & MACSEC_ABILITY);
+ sgmii_ability = !!(phy_abilities & SGMII_ABILITY);
+ if ((!macsec_ability && silicon_version == 2) ||
+ (macsec_ability && silicon_version == 1)) {
+ /* TJA1120/TJA1121 PHY configuration errata workaround.
+ * Apply PHY writes sequence before link up.
+ */
+ if (!macsec_ability) {
+ phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F8, 0x4b95);
+ phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F9, 0xf3cd);
+ } else {
+ phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F8, 0x89c7);
+ phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F9, 0x0893);
+ }
+
+ phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x0476, 0x58a0);
+
+ phy_write_mmd(phydev, MDIO_MMD_PMAPMD, 0x8921, 0xa3a);
+ phy_write_mmd(phydev, MDIO_MMD_PMAPMD, 0x89F1, 0x16c1);
+
+ phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F8, 0x0);
+ phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F9, 0x0);
+
+ if (sgmii_ability) {
+ /* TJA1120B/TJA1121B SGMII PCS restart errata workaround.
+ * Put SGMII PCS into power down mode and back up.
+ */
+ phy_set_bits_mmd(phydev, MDIO_MMD_VEND1,
+ VEND1_SGMII_BASIC_CONTROL,
+ SGMII_LPM);
+ phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1,
+ VEND1_SGMII_BASIC_CONTROL,
+ SGMII_LPM);
+ }
+ }
+}
+
static int nxp_c45_config_init(struct phy_device *phydev)
{
int ret;
@@ -1609,6 +1674,9 @@ static int nxp_c45_config_init(struct phy_device *phydev)
phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F8, 1);
phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F9, 2);
+ if (phy_id_compare(phydev->phy_id, PHY_ID_TJA_1120, GENMASK(31, 4)))
+ nxp_c45_tja1120_errata(phydev);
+
phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_PHY_CONFIG,
PHY_CONFIG_AUTO);
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index a91bf9c7e31d..137adf6d5b08 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -627,7 +627,7 @@ static int lan78xx_read_reg(struct lan78xx_net *dev, u32 index, u32 *data)
kfree(buf);
- return ret;
+ return ret < 0 ? ret : 0;
}
static int lan78xx_write_reg(struct lan78xx_net *dev, u32 index, u32 data)
@@ -658,7 +658,7 @@ static int lan78xx_write_reg(struct lan78xx_net *dev, u32 index, u32 data)
kfree(buf);
- return ret;
+ return ret < 0 ? ret : 0;
}
static int lan78xx_update_reg(struct lan78xx_net *dev, u32 reg, u32 mask,
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c
index 49c8507d1a6b..47854a36413e 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c
@@ -403,6 +403,8 @@ void iwl_trans_op_mode_leave(struct iwl_trans *trans)
iwl_trans_pcie_op_mode_leave(trans);
+ cancel_work_sync(&trans->restart.wk);
+
trans->op_mode = NULL;
trans->state = IWL_TRANS_NO_FW;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index df49dd2e2026..d10877856049 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2012-2014, 2018-2024 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2025 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -422,6 +422,8 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm,
/* if reached this point, Alive notification was received */
iwl_mei_alive_notif(true);
+ iwl_trans_fw_alive(mvm->trans, alive_data.scd_base_addr);
+
ret = iwl_pnvm_load(mvm->trans, &mvm->notif_wait,
&mvm->fw->ucode_capa);
if (ret) {
@@ -430,8 +432,6 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm,
return ret;
}
- iwl_trans_fw_alive(mvm->trans, alive_data.scd_base_addr);
-
/*
* Note: all the queues are enabled as part of the interface
* initialization, but in firmware restart scenarios they
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index 7b6071a59b69..7c1dd5cc084a 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -1869,12 +1869,12 @@ struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb,
unsigned int offset)
{
struct sg_table *sgt;
- unsigned int n_segments;
+ unsigned int n_segments = skb_shinfo(skb)->nr_frags + 1;
+ int orig_nents;
if (WARN_ON_ONCE(skb_has_frag_list(skb)))
return NULL;
- n_segments = DIV_ROUND_UP(skb->len - offset, skb_shinfo(skb)->gso_size);
*hdr = iwl_pcie_get_page_hdr(trans,
hdr_room + __alignof__(struct sg_table) +
sizeof(struct sg_table) +
@@ -1889,11 +1889,12 @@ struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb,
sg_init_table(sgt->sgl, n_segments);
/* Only map the data, not the header (it is copied to the TSO page) */
- sgt->orig_nents = skb_to_sgvec(skb, sgt->sgl, offset,
- skb->len - offset);
- if (WARN_ON_ONCE(sgt->orig_nents <= 0))
+ orig_nents = skb_to_sgvec(skb, sgt->sgl, offset, skb->len - offset);
+ if (WARN_ON_ONCE(orig_nents <= 0))
return NULL;
+ sgt->orig_nents = orig_nents;
+
/* And map the entire SKB */
if (dma_map_sgtable(trans->dev, sgt, DMA_TO_DEVICE, 0) < 0)
return NULL;
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index a060f69558e7..8971aca41e63 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -599,7 +599,8 @@ static inline void apple_nvme_handle_cqe(struct apple_nvme_queue *q,
}
if (!nvme_try_complete_req(req, cqe->status, cqe->result) &&
- !blk_mq_add_to_batch(req, iob, nvme_req(req)->status,
+ !blk_mq_add_to_batch(req, iob,
+ nvme_req(req)->status != NVME_SC_SUCCESS,
apple_nvme_complete_batch))
apple_nvme_complete_rq(req);
}
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f028913e2e62..8359d0aa0e44 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -431,6 +431,12 @@ static inline void nvme_end_req_zoned(struct request *req)
static inline void __nvme_end_req(struct request *req)
{
+ if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET))) {
+ if (blk_rq_is_passthrough(req))
+ nvme_log_err_passthru(req);
+ else
+ nvme_log_error(req);
+ }
nvme_end_req_zoned(req);
nvme_trace_bio_complete(req);
if (req->cmd_flags & REQ_NVME_MPATH)
@@ -441,12 +447,6 @@ void nvme_end_req(struct request *req)
{
blk_status_t status = nvme_error_status(nvme_req(req)->status);
- if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET))) {
- if (blk_rq_is_passthrough(req))
- nvme_log_err_passthru(req);
- else
- nvme_log_error(req);
- }
__nvme_end_req(req);
blk_mq_end_request(req, status);
}
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 640590b21728..3ad7f197c808 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1130,8 +1130,9 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail);
if (!nvme_try_complete_req(req, cqe->status, cqe->result) &&
- !blk_mq_add_to_batch(req, iob, nvme_req(req)->status,
- nvme_pci_complete_batch))
+ !blk_mq_add_to_batch(req, iob,
+ nvme_req(req)->status != NVME_SC_SUCCESS,
+ nvme_pci_complete_batch))
nvme_pci_complete_rq(req);
}
@@ -1411,9 +1412,20 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
struct nvme_dev *dev = nvmeq->dev;
struct request *abort_req;
struct nvme_command cmd = { };
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
u32 csts = readl(dev->bar + NVME_REG_CSTS);
u8 opcode;
+ /*
+ * Shutdown the device immediately if we see it is disconnected. This
+ * unblocks PCIe error handling if the nvme driver is waiting in
+ * error_resume for a device that has been removed. We can't unbind the
+ * driver while the driver's error callback is waiting to complete, so
+ * we're relying on a timeout to break that deadlock if a removal
+ * occurs while reset work is running.
+ */
+ if (pci_dev_is_disconnected(pdev))
+ nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
if (nvme_state_terminal(&dev->ctrl))
goto disable;
@@ -1421,7 +1433,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
* the recovery mechanism will surely fail.
*/
mb();
- if (pci_channel_offline(to_pci_dev(dev->dev)))
+ if (pci_channel_offline(pdev))
return BLK_EH_RESET_TIMER;
/*
diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c
index 565d2bd36dcd..b1e31483f157 100644
--- a/drivers/nvme/target/pci-epf.c
+++ b/drivers/nvme/target/pci-epf.c
@@ -1265,15 +1265,12 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
struct nvmet_pci_epf_queue *cq = &ctrl->cq[cqid];
u16 status;
- if (test_and_set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags))
+ if (test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags))
return NVME_SC_QID_INVALID | NVME_STATUS_DNR;
if (!(flags & NVME_QUEUE_PHYS_CONTIG))
return NVME_SC_INVALID_QUEUE | NVME_STATUS_DNR;
- if (flags & NVME_CQ_IRQ_ENABLED)
- set_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags);
-
cq->pci_addr = pci_addr;
cq->qid = cqid;
cq->depth = qsize + 1;
@@ -1290,24 +1287,27 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
cq->qes = ctrl->io_cqes;
cq->pci_size = cq->qes * cq->depth;
- cq->iv = nvmet_pci_epf_add_irq_vector(ctrl, vector);
- if (!cq->iv) {
- status = NVME_SC_INTERNAL | NVME_STATUS_DNR;
- goto err;
+ if (flags & NVME_CQ_IRQ_ENABLED) {
+ cq->iv = nvmet_pci_epf_add_irq_vector(ctrl, vector);
+ if (!cq->iv)
+ return NVME_SC_INTERNAL | NVME_STATUS_DNR;
+ set_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags);
}
status = nvmet_cq_create(tctrl, &cq->nvme_cq, cqid, cq->depth);
if (status != NVME_SC_SUCCESS)
goto err;
+ set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags);
+
dev_dbg(ctrl->dev, "CQ[%u]: %u entries of %zu B, IRQ vector %u\n",
cqid, qsize, cq->qes, cq->vector);
return NVME_SC_SUCCESS;
err:
- clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags);
- clear_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags);
+ if (test_and_clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags))
+ nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector);
return status;
}
@@ -1333,7 +1333,7 @@ static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl,
struct nvmet_pci_epf_queue *sq = &ctrl->sq[sqid];
u16 status;
- if (test_and_set_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags))
+ if (test_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags))
return NVME_SC_QID_INVALID | NVME_STATUS_DNR;
if (!(flags & NVME_QUEUE_PHYS_CONTIG))
@@ -1355,7 +1355,7 @@ static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl,
status = nvmet_sq_create(tctrl, &sq->nvme_sq, sqid, sq->depth);
if (status != NVME_SC_SUCCESS)
- goto out_clear_bit;
+ return status;
sq->iod_wq = alloc_workqueue("sq%d_wq", WQ_UNBOUND,
min_t(int, sq->depth, WQ_MAX_ACTIVE), sqid);
@@ -1365,6 +1365,8 @@ static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl,
goto out_destroy_sq;
}
+ set_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags);
+
dev_dbg(ctrl->dev, "SQ[%u]: %u entries of %zu B\n",
sqid, qsize, sq->qes);
@@ -1372,8 +1374,6 @@ static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl,
out_destroy_sq:
nvmet_sq_destroy(&sq->nvme_sq);
-out_clear_bit:
- clear_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags);
return status;
}
diff --git a/drivers/pinctrl/bcm/pinctrl-bcm281xx.c b/drivers/pinctrl/bcm/pinctrl-bcm281xx.c
index 73dbf29c002f..cf6efa9c0364 100644
--- a/drivers/pinctrl/bcm/pinctrl-bcm281xx.c
+++ b/drivers/pinctrl/bcm/pinctrl-bcm281xx.c
@@ -974,7 +974,7 @@ static const struct regmap_config bcm281xx_pinctrl_regmap_config = {
.reg_bits = 32,
.reg_stride = 4,
.val_bits = 32,
- .max_register = BCM281XX_PIN_VC_CAM3_SDA,
+ .max_register = BCM281XX_PIN_VC_CAM3_SDA * 4,
};
static int bcm281xx_pinctrl_get_groups_count(struct pinctrl_dev *pctldev)
diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c
index 471f644c5eef..d09a5e9b2eca 100644
--- a/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c
+++ b/drivers/pinctrl/nuvoton/pinctrl-npcm8xx.c
@@ -2374,6 +2374,9 @@ static int npcm8xx_gpio_fw(struct npcm8xx_pinctrl *pctrl)
pctrl->gpio_bank[id].gc.parent = dev;
pctrl->gpio_bank[id].gc.fwnode = child;
pctrl->gpio_bank[id].gc.label = devm_kasprintf(dev, GFP_KERNEL, "%pfw", child);
+ if (pctrl->gpio_bank[id].gc.label == NULL)
+ return -ENOMEM;
+
pctrl->gpio_bank[id].gc.dbg_show = npcmgpio_dbg_show;
pctrl->gpio_bank[id].direction_input = pctrl->gpio_bank[id].gc.direction_input;
pctrl->gpio_bank[id].gc.direction_input = npcmgpio_direction_input;
diff --git a/drivers/pinctrl/spacemit/Kconfig b/drivers/pinctrl/spacemit/Kconfig
index 168f8a5ffbb9..a2f98b3f8a75 100644
--- a/drivers/pinctrl/spacemit/Kconfig
+++ b/drivers/pinctrl/spacemit/Kconfig
@@ -4,9 +4,10 @@
#
config PINCTRL_SPACEMIT_K1
- tristate "SpacemiT K1 SoC Pinctrl driver"
+ bool "SpacemiT K1 SoC Pinctrl driver"
depends on ARCH_SPACEMIT || COMPILE_TEST
depends on OF
+ default y
select GENERIC_PINCTRL_GROUPS
select GENERIC_PINMUX_FUNCTIONS
select GENERIC_PINCONF
diff --git a/drivers/pinctrl/spacemit/pinctrl-k1.c b/drivers/pinctrl/spacemit/pinctrl-k1.c
index a32579d73613..59fd555ff38d 100644
--- a/drivers/pinctrl/spacemit/pinctrl-k1.c
+++ b/drivers/pinctrl/spacemit/pinctrl-k1.c
@@ -1044,7 +1044,7 @@ static struct platform_driver k1_pinctrl_driver = {
.of_match_table = k1_pinctrl_ids,
},
};
-module_platform_driver(k1_pinctrl_driver);
+builtin_platform_driver(k1_pinctrl_driver);
MODULE_AUTHOR("Yixun Lan <dlan@gentoo.org>");
MODULE_DESCRIPTION("Pinctrl driver for the SpacemiT K1 SoC");
diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c
index d4f32ad66530..a594d5fcfcfd 100644
--- a/drivers/platform/surface/surface_aggregator_registry.c
+++ b/drivers/platform/surface/surface_aggregator_registry.c
@@ -371,7 +371,7 @@ static const struct software_node *ssam_node_group_sp8[] = {
NULL,
};
-/* Devices for Surface Pro 9 (Intel/x86) and 10 */
+/* Devices for Surface Pro 9, 10 and 11 (Intel/x86) */
static const struct software_node *ssam_node_group_sp9[] = {
&ssam_node_root,
&ssam_node_hub_kip,
@@ -430,6 +430,9 @@ static const struct acpi_device_id ssam_platform_hub_acpi_match[] = {
/* Surface Pro 10 */
{ "MSHW0510", (unsigned long)ssam_node_group_sp9 },
+ /* Surface Pro 11 */
+ { "MSHW0583", (unsigned long)ssam_node_group_sp9 },
+
/* Surface Book 2 */
{ "MSHW0107", (unsigned long)ssam_node_group_gen5 },
diff --git a/drivers/platform/x86/amd/pmf/spc.c b/drivers/platform/x86/amd/pmf/spc.c
index f34f3130c330..1d90f9382024 100644
--- a/drivers/platform/x86/amd/pmf/spc.c
+++ b/drivers/platform/x86/amd/pmf/spc.c
@@ -219,12 +219,14 @@ static int amd_pmf_get_slider_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_
switch (dev->current_profile) {
case PLATFORM_PROFILE_PERFORMANCE:
+ case PLATFORM_PROFILE_BALANCED_PERFORMANCE:
val = TA_BEST_PERFORMANCE;
break;
case PLATFORM_PROFILE_BALANCED:
val = TA_BETTER_PERFORMANCE;
break;
case PLATFORM_PROFILE_LOW_POWER:
+ case PLATFORM_PROFILE_QUIET:
val = TA_BEST_BATTERY;
break;
default:
diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c
index ceaff1ebb7b9..a1e43873a07b 100644
--- a/drivers/platform/x86/amd/pmf/tee-if.c
+++ b/drivers/platform/x86/amd/pmf/tee-if.c
@@ -510,18 +510,18 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
ret = amd_pmf_set_dram_addr(dev, true);
if (ret)
- goto error;
+ goto err_cancel_work;
dev->policy_base = devm_ioremap_resource(dev->dev, dev->res);
if (IS_ERR(dev->policy_base)) {
ret = PTR_ERR(dev->policy_base);
- goto error;
+ goto err_free_dram_buf;
}
dev->policy_buf = kzalloc(dev->policy_sz, GFP_KERNEL);
if (!dev->policy_buf) {
ret = -ENOMEM;
- goto error;
+ goto err_free_dram_buf;
}
memcpy_fromio(dev->policy_buf, dev->policy_base, dev->policy_sz);
@@ -531,13 +531,13 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL);
if (!dev->prev_data) {
ret = -ENOMEM;
- goto error;
+ goto err_free_policy;
}
for (i = 0; i < ARRAY_SIZE(amd_pmf_ta_uuid); i++) {
ret = amd_pmf_tee_init(dev, &amd_pmf_ta_uuid[i]);
if (ret)
- return ret;
+ goto err_free_prev_data;
ret = amd_pmf_start_policy_engine(dev);
switch (ret) {
@@ -550,27 +550,41 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev)
status = false;
break;
default:
- goto error;
+ ret = -EINVAL;
+ amd_pmf_tee_deinit(dev);
+ goto err_free_prev_data;
}
if (status)
break;
}
- if (!status && !pb_side_load)
- goto error;
+ if (!status && !pb_side_load) {
+ ret = -EINVAL;
+ goto err_free_prev_data;
+ }
if (pb_side_load)
amd_pmf_open_pb(dev, dev->dbgfs_dir);
ret = amd_pmf_register_input_device(dev);
if (ret)
- goto error;
+ goto err_pmf_remove_pb;
return 0;
-error:
- amd_pmf_deinit_smart_pc(dev);
+err_pmf_remove_pb:
+ if (pb_side_load && dev->esbin)
+ amd_pmf_remove_pb(dev);
+ amd_pmf_tee_deinit(dev);
+err_free_prev_data:
+ kfree(dev->prev_data);
+err_free_policy:
+ kfree(dev->policy_buf);
+err_free_dram_buf:
+ kfree(dev->buf);
+err_cancel_work:
+ cancel_delayed_work_sync(&dev->pb_work);
return ret;
}
diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c
index d8c9be64d006..244ac0106862 100644
--- a/drivers/spi/atmel-quadspi.c
+++ b/drivers/spi/atmel-quadspi.c
@@ -930,11 +930,8 @@ static int atmel_qspi_sama7g5_transfer(struct spi_mem *mem,
/* Release the chip-select. */
ret = atmel_qspi_reg_sync(aq);
- if (ret) {
- pm_runtime_mark_last_busy(&aq->pdev->dev);
- pm_runtime_put_autosuspend(&aq->pdev->dev);
+ if (ret)
return ret;
- }
atmel_qspi_write(QSPI_CR_LASTXFER, aq, QSPI_CR);
return atmel_qspi_wait_for_completion(aq, QSPI_SR_CSRA);
diff --git a/drivers/spi/spi-microchip-core.c b/drivers/spi/spi-microchip-core.c
index 5b6af55855ef..62ba0bd9cbb7 100644
--- a/drivers/spi/spi-microchip-core.c
+++ b/drivers/spi/spi-microchip-core.c
@@ -70,8 +70,7 @@
#define INT_RX_CHANNEL_OVERFLOW BIT(2)
#define INT_TX_CHANNEL_UNDERRUN BIT(3)
-#define INT_ENABLE_MASK (CONTROL_RX_DATA_INT | CONTROL_TX_DATA_INT | \
- CONTROL_RX_OVER_INT | CONTROL_TX_UNDER_INT)
+#define INT_ENABLE_MASK (CONTROL_RX_OVER_INT | CONTROL_TX_UNDER_INT)
#define REG_CONTROL (0x00)
#define REG_FRAME_SIZE (0x04)
@@ -133,10 +132,15 @@ static inline void mchp_corespi_disable(struct mchp_corespi *spi)
mchp_corespi_write(spi, REG_CONTROL, control);
}
-static inline void mchp_corespi_read_fifo(struct mchp_corespi *spi)
+static inline void mchp_corespi_read_fifo(struct mchp_corespi *spi, int fifo_max)
{
- while (spi->rx_len >= spi->n_bytes && !(mchp_corespi_read(spi, REG_STATUS) & STATUS_RXFIFO_EMPTY)) {
- u32 data = mchp_corespi_read(spi, REG_RX_DATA);
+ for (int i = 0; i < fifo_max; i++) {
+ u32 data;
+
+ while (mchp_corespi_read(spi, REG_STATUS) & STATUS_RXFIFO_EMPTY)
+ ;
+
+ data = mchp_corespi_read(spi, REG_RX_DATA);
spi->rx_len -= spi->n_bytes;
@@ -211,11 +215,10 @@ static inline void mchp_corespi_set_xfer_size(struct mchp_corespi *spi, int len)
mchp_corespi_write(spi, REG_FRAMESUP, len);
}
-static inline void mchp_corespi_write_fifo(struct mchp_corespi *spi)
+static inline void mchp_corespi_write_fifo(struct mchp_corespi *spi, int fifo_max)
{
- int fifo_max, i = 0;
+ int i = 0;
- fifo_max = DIV_ROUND_UP(min(spi->tx_len, FIFO_DEPTH), spi->n_bytes);
mchp_corespi_set_xfer_size(spi, fifo_max);
while ((i < fifo_max) && !(mchp_corespi_read(spi, REG_STATUS) & STATUS_TXFIFO_FULL)) {
@@ -413,19 +416,6 @@ static irqreturn_t mchp_corespi_interrupt(int irq, void *dev_id)
if (intfield == 0)
return IRQ_NONE;
- if (intfield & INT_TXDONE)
- mchp_corespi_write(spi, REG_INT_CLEAR, INT_TXDONE);
-
- if (intfield & INT_RXRDY) {
- mchp_corespi_write(spi, REG_INT_CLEAR, INT_RXRDY);
-
- if (spi->rx_len)
- mchp_corespi_read_fifo(spi);
- }
-
- if (!spi->rx_len && !spi->tx_len)
- finalise = true;
-
if (intfield & INT_RX_CHANNEL_OVERFLOW) {
mchp_corespi_write(spi, REG_INT_CLEAR, INT_RX_CHANNEL_OVERFLOW);
finalise = true;
@@ -512,9 +502,14 @@ static int mchp_corespi_transfer_one(struct spi_controller *host,
mchp_corespi_write(spi, REG_SLAVE_SELECT, spi->pending_slave_select);
- while (spi->tx_len)
- mchp_corespi_write_fifo(spi);
+ while (spi->tx_len) {
+ int fifo_max = DIV_ROUND_UP(min(spi->tx_len, FIFO_DEPTH), spi->n_bytes);
+
+ mchp_corespi_write_fifo(spi, fifo_max);
+ mchp_corespi_read_fifo(spi, fifo_max);
+ }
+ spi_finalize_current_transfer(host);
return 1;
}
diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c
index 8229a6fbda5a..717b31d78728 100644
--- a/drivers/thunderbolt/tunnel.c
+++ b/drivers/thunderbolt/tunnel.c
@@ -1009,6 +1009,8 @@ static int tb_dp_dprx_start(struct tb_tunnel *tunnel)
*/
tb_tunnel_get(tunnel);
+ tunnel->dprx_started = true;
+
if (tunnel->callback) {
tunnel->dprx_timeout = dprx_timeout_to_ktime(dprx_timeout);
queue_delayed_work(tunnel->tb->wq, &tunnel->dprx_work, 0);
@@ -1021,9 +1023,12 @@ static int tb_dp_dprx_start(struct tb_tunnel *tunnel)
static void tb_dp_dprx_stop(struct tb_tunnel *tunnel)
{
- tunnel->dprx_canceled = true;
- cancel_delayed_work(&tunnel->dprx_work);
- tb_tunnel_put(tunnel);
+ if (tunnel->dprx_started) {
+ tunnel->dprx_started = false;
+ tunnel->dprx_canceled = true;
+ cancel_delayed_work(&tunnel->dprx_work);
+ tb_tunnel_put(tunnel);
+ }
}
static int tb_dp_activate(struct tb_tunnel *tunnel, bool active)
diff --git a/drivers/thunderbolt/tunnel.h b/drivers/thunderbolt/tunnel.h
index 7f6d3a18a41e..8a0a0cb21a89 100644
--- a/drivers/thunderbolt/tunnel.h
+++ b/drivers/thunderbolt/tunnel.h
@@ -63,6 +63,7 @@ enum tb_tunnel_state {
* @allocated_down: Allocated downstream bandwidth (only for USB3)
* @bw_mode: DP bandwidth allocation mode registers can be used to
* determine consumed and allocated bandwidth
+ * @dprx_started: DPRX negotiation was started (tb_dp_dprx_start() was called for it)
* @dprx_canceled: Was DPRX capabilities read poll canceled
* @dprx_timeout: If set DPRX capabilities read poll work will timeout after this passes
* @dprx_work: Worker that is scheduled to poll completion of DPRX capabilities read
@@ -100,6 +101,7 @@ struct tb_tunnel {
int allocated_up;
int allocated_down;
bool bw_mode;
+ bool dprx_started;
bool dprx_canceled;
ktime_t dprx_timeout;
struct delayed_work dprx_work;
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index e07c5e3eb18c..9b34e23b7091 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -1079,6 +1079,20 @@ static const struct usb_device_id id_table_combined[] = {
.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
/* GMC devices */
{ USB_DEVICE(GMC_VID, GMC_Z216C_PID) },
+ /* Altera USB Blaster 3 */
+ { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6022_PID, 1) },
+ { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6025_PID, 2) },
+ { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6026_PID, 2) },
+ { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6026_PID, 3) },
+ { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6029_PID, 2) },
+ { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602A_PID, 2) },
+ { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602A_PID, 3) },
+ { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602C_PID, 1) },
+ { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602D_PID, 1) },
+ { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602D_PID, 2) },
+ { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 1) },
+ { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 2) },
+ { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 3) },
{ } /* Terminating entry */
};
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 5ee60ba2a73c..52be47d684ea 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -1612,3 +1612,16 @@
*/
#define GMC_VID 0x1cd7
#define GMC_Z216C_PID 0x0217 /* GMC Z216C Adapter IR-USB */
+
+/*
+ * Altera USB Blaster 3 (http://www.altera.com).
+ */
+#define ALTERA_VID 0x09fb
+#define ALTERA_UB3_6022_PID 0x6022
+#define ALTERA_UB3_6025_PID 0x6025
+#define ALTERA_UB3_6026_PID 0x6026
+#define ALTERA_UB3_6029_PID 0x6029
+#define ALTERA_UB3_602A_PID 0x602a
+#define ALTERA_UB3_602C_PID 0x602c
+#define ALTERA_UB3_602D_PID 0x602d
+#define ALTERA_UB3_602E_PID 0x602e
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 58bd54e8c483..5cd26dac2069 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1368,13 +1368,13 @@ static const struct usb_device_id option_ids[] = {
.driver_info = NCTRL(0) | RSVD(1) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1075, 0xff), /* Telit FN990A (PCIe) */
.driver_info = RSVD(0) },
- { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1080, 0xff), /* Telit FE990 (rmnet) */
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1080, 0xff), /* Telit FE990A (rmnet) */
.driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
- { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1081, 0xff), /* Telit FE990 (MBIM) */
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1081, 0xff), /* Telit FE990A (MBIM) */
.driver_info = NCTRL(0) | RSVD(1) },
- { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1082, 0xff), /* Telit FE990 (RNDIS) */
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1082, 0xff), /* Telit FE990A (RNDIS) */
.driver_info = NCTRL(2) | RSVD(3) },
- { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1083, 0xff), /* Telit FE990 (ECM) */
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1083, 0xff), /* Telit FE990A (ECM) */
.driver_info = NCTRL(0) | RSVD(1) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a0, 0xff), /* Telit FN20C04 (rmnet) */
.driver_info = RSVD(0) | NCTRL(3) },
@@ -1388,28 +1388,44 @@ static const struct usb_device_id option_ids[] = {
.driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10aa, 0xff), /* Telit FN920C04 (MBIM) */
.driver_info = NCTRL(3) | RSVD(4) | RSVD(5) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b0, 0xff, 0xff, 0x30), /* Telit FE990B (rmnet) */
+ .driver_info = NCTRL(5) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b0, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b0, 0xff, 0xff, 0x60) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b1, 0xff, 0xff, 0x30), /* Telit FE990B (MBIM) */
+ .driver_info = NCTRL(6) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b1, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b1, 0xff, 0xff, 0x60) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b2, 0xff, 0xff, 0x30), /* Telit FE990B (RNDIS) */
+ .driver_info = NCTRL(6) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b2, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b2, 0xff, 0xff, 0x60) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b3, 0xff, 0xff, 0x30), /* Telit FE990B (ECM) */
+ .driver_info = NCTRL(6) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b3, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10b3, 0xff, 0xff, 0x60) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10c0, 0xff), /* Telit FE910C04 (rmnet) */
.driver_info = RSVD(0) | NCTRL(3) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10c4, 0xff), /* Telit FE910C04 (rmnet) */
.driver_info = RSVD(0) | NCTRL(3) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10c8, 0xff), /* Telit FE910C04 (rmnet) */
.driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) },
- { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d0, 0x60) }, /* Telit FN990B (rmnet) */
- { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d0, 0x40) },
- { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d0, 0x30),
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d0, 0xff, 0xff, 0x30), /* Telit FN990B (rmnet) */
.driver_info = NCTRL(5) },
- { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d1, 0x60) }, /* Telit FN990B (MBIM) */
- { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d1, 0x40) },
- { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d1, 0x30),
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d0, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d0, 0xff, 0xff, 0x60) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d1, 0xff, 0xff, 0x30), /* Telit FN990B (MBIM) */
.driver_info = NCTRL(6) },
- { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d2, 0x60) }, /* Telit FN990B (RNDIS) */
- { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d2, 0x40) },
- { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d2, 0x30),
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d1, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d1, 0xff, 0xff, 0x60) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d2, 0xff, 0xff, 0x30), /* Telit FN990B (RNDIS) */
.driver_info = NCTRL(6) },
- { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d3, 0x60) }, /* Telit FN990B (ECM) */
- { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d3, 0x40) },
- { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d3, 0x30),
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d2, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d2, 0xff, 0xff, 0x60) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d3, 0xff, 0xff, 0x30), /* Telit FN990B (ECM) */
.driver_info = NCTRL(6) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d3, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x10d3, 0xff, 0xff, 0x60) },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910),
.driver_info = NCTRL(0) | RSVD(1) | RSVD(3) },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM),
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 6bf1a22c785a..62ca4a0ec55b 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5117,16 +5117,16 @@ static void run_state_machine(struct tcpm_port *port)
*/
if (port->vbus_never_low) {
port->vbus_never_low = false;
- tcpm_set_state(port, SNK_SOFT_RESET,
- port->timings.sink_wait_cap_time);
+ upcoming_state = SNK_SOFT_RESET;
} else {
if (!port->self_powered)
upcoming_state = SNK_WAIT_CAPABILITIES_TIMEOUT;
else
upcoming_state = hard_reset_state(port);
- tcpm_set_state(port, SNK_WAIT_CAPABILITIES_TIMEOUT,
- port->timings.sink_wait_cap_time);
}
+
+ tcpm_set_state(port, upcoming_state,
+ port->timings.sink_wait_cap_time);
break;
case SNK_WAIT_CAPABILITIES_TIMEOUT:
/*
diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c
index 7fdb5edd7e2e..75338ffc703f 100644
--- a/drivers/video/fbdev/hyperv_fb.c
+++ b/drivers/video/fbdev/hyperv_fb.c
@@ -282,6 +282,8 @@ static uint screen_depth;
static uint screen_fb_size;
static uint dio_fb_size; /* FB size for deferred IO */
+static void hvfb_putmem(struct fb_info *info);
+
/* Send message to Hyper-V host */
static inline int synthvid_send(struct hv_device *hdev,
struct synthvid_msg *msg)
@@ -863,6 +865,17 @@ static void hvfb_ops_damage_area(struct fb_info *info, u32 x, u32 y, u32 width,
}
/*
+ * fb_ops.fb_destroy is called by the last put_fb_info() call at the end
+ * of unregister_framebuffer() or fb_release(). Do any cleanup related to
+ * framebuffer here.
+ */
+static void hvfb_destroy(struct fb_info *info)
+{
+ hvfb_putmem(info);
+ framebuffer_release(info);
+}
+
+/*
* TODO: GEN1 codepaths allocate from system or DMA-able memory. Fix the
* driver to use the _SYSMEM_ or _DMAMEM_ helpers in these cases.
*/
@@ -877,6 +890,7 @@ static const struct fb_ops hvfb_ops = {
.fb_set_par = hvfb_set_par,
.fb_setcolreg = hvfb_setcolreg,
.fb_blank = hvfb_blank,
+ .fb_destroy = hvfb_destroy,
};
/* Get options from kernel paramenter "video=" */
@@ -952,7 +966,7 @@ static phys_addr_t hvfb_get_phymem(struct hv_device *hdev,
}
/* Release contiguous physical memory */
-static void hvfb_release_phymem(struct hv_device *hdev,
+static void hvfb_release_phymem(struct device *device,
phys_addr_t paddr, unsigned int size)
{
unsigned int order = get_order(size);
@@ -960,7 +974,7 @@ static void hvfb_release_phymem(struct hv_device *hdev,
if (order <= MAX_PAGE_ORDER)
__free_pages(pfn_to_page(paddr >> PAGE_SHIFT), order);
else
- dma_free_coherent(&hdev->device,
+ dma_free_coherent(device,
round_up(size, PAGE_SIZE),
phys_to_virt(paddr),
paddr);
@@ -989,6 +1003,7 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info)
base = pci_resource_start(pdev, 0);
size = pci_resource_len(pdev, 0);
+ aperture_remove_conflicting_devices(base, size, KBUILD_MODNAME);
/*
* For Gen 1 VM, we can directly use the contiguous memory
@@ -1010,11 +1025,21 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info)
goto getmem_done;
}
pr_info("Unable to allocate enough contiguous physical memory on Gen 1 VM. Using MMIO instead.\n");
+ } else {
+ aperture_remove_all_conflicting_devices(KBUILD_MODNAME);
}
/*
- * Cannot use the contiguous physical memory.
- * Allocate mmio space for framebuffer.
+ * Cannot use contiguous physical memory, so allocate MMIO space for
+ * the framebuffer. At this point in the function, conflicting devices
+ * that might have claimed the framebuffer MMIO space based on
+ * screen_info.lfb_base must have already been removed so that
+ * vmbus_allocate_mmio() does not allocate different MMIO space. If the
+ * kdump image were to be loaded using kexec_file_load(), the
+ * framebuffer location in the kdump image would be set from
+ * screen_info.lfb_base at the time that kdump is enabled. If the
+ * framebuffer has moved elsewhere, this could be the wrong location,
+ * causing kdump to hang when efifb (for example) loads.
*/
dio_fb_size =
screen_width * screen_height * screen_depth / 8;
@@ -1051,11 +1076,6 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info)
info->screen_size = dio_fb_size;
getmem_done:
- if (base && size)
- aperture_remove_conflicting_devices(base, size, KBUILD_MODNAME);
- else
- aperture_remove_all_conflicting_devices(KBUILD_MODNAME);
-
if (!gen2vm)
pci_dev_put(pdev);
@@ -1074,16 +1094,16 @@ err1:
}
/* Release the framebuffer */
-static void hvfb_putmem(struct hv_device *hdev, struct fb_info *info)
+static void hvfb_putmem(struct fb_info *info)
{
struct hvfb_par *par = info->par;
if (par->need_docopy) {
vfree(par->dio_vp);
- iounmap(info->screen_base);
+ iounmap(par->mmio_vp);
vmbus_free_mmio(par->mem->start, screen_fb_size);
} else {
- hvfb_release_phymem(hdev, info->fix.smem_start,
+ hvfb_release_phymem(info->device, info->fix.smem_start,
screen_fb_size);
}
@@ -1172,7 +1192,7 @@ static int hvfb_probe(struct hv_device *hdev,
if (ret)
goto error;
- ret = register_framebuffer(info);
+ ret = devm_register_framebuffer(&hdev->device, info);
if (ret) {
pr_err("Unable to register framebuffer\n");
goto error;
@@ -1197,7 +1217,7 @@ static int hvfb_probe(struct hv_device *hdev,
error:
fb_deferred_io_cleanup(info);
- hvfb_putmem(hdev, info);
+ hvfb_putmem(info);
error2:
vmbus_close(hdev->channel);
error1:
@@ -1220,14 +1240,10 @@ static void hvfb_remove(struct hv_device *hdev)
fb_deferred_io_cleanup(info);
- unregister_framebuffer(info);
cancel_delayed_work_sync(&par->dwork);
vmbus_close(hdev->channel);
hv_set_drvdata(hdev, NULL);
-
- hvfb_putmem(hdev, info);
- framebuffer_release(info);
}
static int hvfb_suspend(struct hv_device *hdev)
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index cee42646736c..96a6781f3653 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -64,7 +64,8 @@ static struct afs_cell *afs_find_cell_locked(struct afs_net *net,
return ERR_PTR(-ENAMETOOLONG);
if (!name) {
- cell = net->ws_cell;
+ cell = rcu_dereference_protected(net->ws_cell,
+ lockdep_is_held(&net->cells_lock));
if (!cell)
return ERR_PTR(-EDESTADDRREQ);
goto found;
@@ -388,8 +389,8 @@ int afs_cell_init(struct afs_net *net, const char *rootcell)
/* install the new cell */
down_write(&net->cells_lock);
afs_see_cell(new_root, afs_cell_trace_see_ws);
- old_root = net->ws_cell;
- net->ws_cell = new_root;
+ old_root = rcu_replace_pointer(net->ws_cell, new_root,
+ lockdep_is_held(&net->cells_lock));
up_write(&net->cells_lock);
afs_unuse_cell(net, old_root, afs_cell_trace_unuse_ws);
@@ -945,8 +946,8 @@ void afs_cell_purge(struct afs_net *net)
_enter("");
down_write(&net->cells_lock);
- ws = net->ws_cell;
- net->ws_cell = NULL;
+ ws = rcu_replace_pointer(net->ws_cell, NULL,
+ lockdep_is_held(&net->cells_lock));
up_write(&net->cells_lock);
afs_unuse_cell(net, ws, afs_cell_trace_unuse_ws);
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index d8bf52f77d93..008698d706ca 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -314,12 +314,23 @@ static const char *afs_atcell_get_link(struct dentry *dentry, struct inode *inod
const char *name;
bool dotted = vnode->fid.vnode == 3;
- if (!net->ws_cell)
+ if (!dentry) {
+ /* We're in RCU-pathwalk. */
+ cell = rcu_dereference(net->ws_cell);
+ if (dotted)
+ name = cell->name - 1;
+ else
+ name = cell->name;
+ /* Shouldn't need to set a delayed call. */
+ return name;
+ }
+
+ if (!rcu_access_pointer(net->ws_cell))
return ERR_PTR(-ENOENT);
down_read(&net->cells_lock);
- cell = net->ws_cell;
+ cell = rcu_dereference_protected(net->ws_cell, lockdep_is_held(&net->cells_lock));
if (dotted)
name = cell->name - 1;
else
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 90f407774a9a..df30bd62da79 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -287,7 +287,7 @@ struct afs_net {
/* Cell database */
struct rb_root cells;
- struct afs_cell *ws_cell;
+ struct afs_cell __rcu *ws_cell;
struct work_struct cells_manager;
struct timer_list cells_timer;
atomic_t cells_outstanding;
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index e7614f4f30c2..12c88d8be3fe 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -206,7 +206,7 @@ static int afs_proc_rootcell_show(struct seq_file *m, void *v)
net = afs_seq2net_single(m);
down_read(&net->cells_lock);
- cell = net->ws_cell;
+ cell = rcu_dereference_protected(net->ws_cell, lockdep_is_held(&net->cells_lock));
if (cell)
seq_printf(m, "%s\n", cell->name);
up_read(&net->cells_lock);
@@ -242,7 +242,7 @@ static int afs_proc_rootcell_write(struct file *file, char *buf, size_t size)
ret = -EEXIST;
inode_lock(file_inode(file));
- if (!net->ws_cell)
+ if (!rcu_access_pointer(net->ws_cell))
ret = afs_cell_init(net, buf);
else
printk("busy\n");
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index dece27d9db04..756736f9243d 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -1186,7 +1186,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
le64_to_cpu(i->journal_seq),
b->written, b->written + sectors, ptr_written);
- b->written += sectors;
+ b->written = min(b->written + sectors, btree_sectors(c));
if (blacklisted && !first)
continue;
diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
index 8f22ef9a7651..47d8690f01bf 100644
--- a/fs/bcachefs/btree_update.h
+++ b/fs/bcachefs/btree_update.h
@@ -126,10 +126,18 @@ bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s)
int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bkey_i *);
+int bch2_btree_write_buffer_insert_err(struct btree_trans *,
+ enum btree_id, struct bkey_i *);
+
static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
enum btree_id btree,
struct bkey_i *k)
{
+ if (unlikely(!btree_type_uses_write_buffer(btree))) {
+ int ret = bch2_btree_write_buffer_insert_err(trans, btree, k);
+ dump_stack();
+ return ret;
+ }
/*
* Most updates skip the btree write buffer until journal replay is
* finished because synchronization with journal replay relies on having
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index b56c4987b8c9..2c09d19dd621 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -264,6 +264,22 @@ out:
BUG_ON(wb->sorted.size < wb->flushing.keys.nr);
}
+int bch2_btree_write_buffer_insert_err(struct btree_trans *trans,
+ enum btree_id btree, struct bkey_i *k)
+{
+ struct bch_fs *c = trans->c;
+ struct printbuf buf = PRINTBUF;
+
+ prt_printf(&buf, "attempting to do write buffer update on non wb btree=");
+ bch2_btree_id_to_text(&buf, btree);
+ prt_str(&buf, "\n");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+
+ bch2_fs_inconsistent(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ return -EROFS;
+}
+
static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
@@ -312,7 +328,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
darray_for_each(wb->sorted, i) {
struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx];
- BUG_ON(!btree_type_uses_write_buffer(k->btree));
+ if (unlikely(!btree_type_uses_write_buffer(k->btree))) {
+ ret = bch2_btree_write_buffer_insert_err(trans, k->btree, &k->k);
+ goto err;
+ }
for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++)
prefetch(&wb->flushing.keys.data[n->idx]);
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 05d5f71a7ca9..2d8042f853dc 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -99,7 +99,7 @@ static inline bool ptr_better(struct bch_fs *c,
/* Pick at random, biased in favor of the faster device: */
- return bch2_rand_range(l1 + l2) > l1;
+ return bch2_get_random_u64_below(l1 + l2) > l1;
}
if (bch2_force_reconstruct_read)
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index 04ec05206f8c..339b80770f1d 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -1198,6 +1198,7 @@ void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c,
opts->_name##_from_inode = true; \
} else { \
opts->_name = c->opts._name; \
+ opts->_name##_from_inode = false; \
}
BCH_INODE_OPTS()
#undef x
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index 8c7b2d3d779d..aa91fcf51eec 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -59,7 +59,7 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target)
}
rcu_read_unlock();
- return bch2_rand_range(nr * CONGESTED_MAX) < total;
+ return get_random_u32_below(nr * CONGESTED_MAX) < total;
}
#else
@@ -951,12 +951,6 @@ retry_pick:
goto retry_pick;
}
- /*
- * Unlock the iterator while the btree node's lock is still in
- * cache, before doing the IO:
- */
- bch2_trans_unlock(trans);
-
if (flags & BCH_READ_NODECODE) {
/*
* can happen if we retry, and the extent we were going to read
@@ -1113,6 +1107,15 @@ get_bio:
trace_and_count(c, read_split, &orig->bio);
}
+ /*
+ * Unlock the iterator while the btree node's lock is still in
+ * cache, before doing the IO:
+ */
+ if (!(flags & BCH_READ_IN_RETRY))
+ bch2_trans_unlock(trans);
+ else
+ bch2_trans_unlock_long(trans);
+
if (!rbio->pick.idx) {
if (unlikely(!rbio->have_ioref)) {
struct printbuf buf = PRINTBUF;
@@ -1160,6 +1163,8 @@ out:
if (likely(!(flags & BCH_READ_IN_RETRY))) {
return 0;
} else {
+ bch2_trans_unlock(trans);
+
int ret;
rbio->context = RBIO_CONTEXT_UNBOUND;
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 6d97d412fed9..0459c875e189 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -1811,7 +1811,11 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
goto err_late;
up_write(&c->state_lock);
- return 0;
+out:
+ printbuf_exit(&label);
+ printbuf_exit(&errbuf);
+ bch_err_fn(c, ret);
+ return ret;
err_unlock:
mutex_unlock(&c->sb_lock);
@@ -1820,10 +1824,7 @@ err:
if (ca)
bch2_dev_free(ca);
bch2_free_super(&sb);
- printbuf_exit(&label);
- printbuf_exit(&errbuf);
- bch_err_fn(c, ret);
- return ret;
+ goto out;
err_late:
up_write(&c->state_lock);
ca = NULL;
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index e0a876cbaa6b..da2cd11b3025 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -653,19 +653,25 @@ int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)
return 0;
}
-size_t bch2_rand_range(size_t max)
+u64 bch2_get_random_u64_below(u64 ceil)
{
- size_t rand;
+ if (ceil <= U32_MAX)
+ return __get_random_u32_below(ceil);
- if (!max)
- return 0;
+ /* this is the same (clever) algorithm as in __get_random_u32_below() */
+ u64 rand = get_random_u64();
+ u64 mult = ceil * rand;
- do {
- rand = get_random_long();
- rand &= roundup_pow_of_two(max) - 1;
- } while (rand >= max);
+ if (unlikely(mult < ceil)) {
+ u64 bound;
+ div64_u64_rem(-ceil, ceil, &bound);
+ while (unlikely(mult < bound)) {
+ rand = get_random_u64();
+ mult = ceil * rand;
+ }
+ }
- return rand;
+ return mul_u64_u64_shr(ceil, rand, 64);
}
void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src)
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index e7c3541b38f3..f4a4783219d9 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -401,7 +401,7 @@ do { \
_ret; \
})
-size_t bch2_rand_range(size_t);
+u64 bch2_get_random_u64_below(u64);
void memcpy_to_bio(struct bio *, struct bvec_iter, const void *);
void memcpy_from_bio(void *, struct bio *, struct bvec_iter);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index a5205149adba..3bd96c3d4cd0 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -756,9 +756,6 @@ retry:
return VM_FAULT_SIGBUS;
}
} else {
- result = filemap_fsnotify_fault(vmf);
- if (unlikely(result))
- return result;
filemap_invalidate_lock_shared(mapping);
}
result = dax_iomap_fault(vmf, order, &pfn, &error, &ext4_iomap_ops);
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index f917de020dd5..73f93a35eedd 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -1825,9 +1825,8 @@ static int match_session(struct cifs_ses *ses,
struct smb3_fs_context *ctx,
bool match_super)
{
- if (ctx->sectype != Unspecified &&
- ctx->sectype != ses->sectype)
- return 0;
+ struct TCP_Server_Info *server = ses->server;
+ enum securityEnum ctx_sec, ses_sec;
if (!match_super && ctx->dfs_root_ses != ses->dfs_root_ses)
return 0;
@@ -1839,11 +1838,20 @@ static int match_session(struct cifs_ses *ses,
if (ses->chan_max < ctx->max_channels)
return 0;
- switch (ses->sectype) {
+ ctx_sec = server->ops->select_sectype(server, ctx->sectype);
+ ses_sec = server->ops->select_sectype(server, ses->sectype);
+
+ if (ctx_sec != ses_sec)
+ return 0;
+
+ switch (ctx_sec) {
+ case IAKerb:
case Kerberos:
if (!uid_eq(ctx->cred_uid, ses->cred_uid))
return 0;
break;
+ case NTLMv2:
+ case RawNTLMSSP:
default:
/* NULL username means anonymous session */
if (ses->user_name == NULL) {
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index e9b286d9a7ba..8c73d4d60d1a 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -171,6 +171,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
fsparam_string("username", Opt_user),
fsparam_string("pass", Opt_pass),
fsparam_string("password", Opt_pass),
+ fsparam_string("pass2", Opt_pass2),
fsparam_string("password2", Opt_pass2),
fsparam_string("ip", Opt_ip),
fsparam_string("addr", Opt_ip),
@@ -1131,6 +1132,9 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
} else if (!strcmp("user", param->key) || !strcmp("username", param->key)) {
skip_parsing = true;
opt = Opt_user;
+ } else if (!strcmp("pass2", param->key) || !strcmp("password2", param->key)) {
+ skip_parsing = true;
+ opt = Opt_pass2;
}
}
@@ -1340,21 +1344,21 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
}
break;
case Opt_acregmax:
- ctx->acregmax = HZ * result.uint_32;
- if (ctx->acregmax > CIFS_MAX_ACTIMEO) {
+ if (result.uint_32 > CIFS_MAX_ACTIMEO / HZ) {
cifs_errorf(fc, "acregmax too large\n");
goto cifs_parse_mount_err;
}
+ ctx->acregmax = HZ * result.uint_32;
break;
case Opt_acdirmax:
- ctx->acdirmax = HZ * result.uint_32;
- if (ctx->acdirmax > CIFS_MAX_ACTIMEO) {
+ if (result.uint_32 > CIFS_MAX_ACTIMEO / HZ) {
cifs_errorf(fc, "acdirmax too large\n");
goto cifs_parse_mount_err;
}
+ ctx->acdirmax = HZ * result.uint_32;
break;
case Opt_actimeo:
- if (HZ * result.uint_32 > CIFS_MAX_ACTIMEO) {
+ if (result.uint_32 > CIFS_MAX_ACTIMEO / HZ) {
cifs_errorf(fc, "timeout too large\n");
goto cifs_parse_mount_err;
}
@@ -1366,11 +1370,11 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
ctx->acdirmax = ctx->acregmax = HZ * result.uint_32;
break;
case Opt_closetimeo:
- ctx->closetimeo = HZ * result.uint_32;
- if (ctx->closetimeo > SMB3_MAX_DCLOSETIMEO) {
+ if (result.uint_32 > SMB3_MAX_DCLOSETIMEO / HZ) {
cifs_errorf(fc, "closetimeo too large\n");
goto cifs_parse_mount_err;
}
+ ctx->closetimeo = HZ * result.uint_32;
break;
case Opt_echo_interval:
ctx->echo_interval = result.uint_32;
diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c
index f8a40f65db6a..c1f22c129111 100644
--- a/fs/smb/server/connection.c
+++ b/fs/smb/server/connection.c
@@ -433,6 +433,26 @@ void ksmbd_conn_init_server_callbacks(struct ksmbd_conn_ops *ops)
default_conn_ops.terminate_fn = ops->terminate_fn;
}
+void ksmbd_conn_r_count_inc(struct ksmbd_conn *conn)
+{
+ atomic_inc(&conn->r_count);
+}
+
+void ksmbd_conn_r_count_dec(struct ksmbd_conn *conn)
+{
+ /*
+ * Checking waitqueue to dropping pending requests on
+ * disconnection. waitqueue_active is safe because it
+ * uses atomic operation for condition.
+ */
+ atomic_inc(&conn->refcnt);
+ if (!atomic_dec_return(&conn->r_count) && waitqueue_active(&conn->r_count_q))
+ wake_up(&conn->r_count_q);
+
+ if (atomic_dec_and_test(&conn->refcnt))
+ kfree(conn);
+}
+
int ksmbd_conn_transport_init(void)
{
int ret;
diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h
index b379ae4fdcdf..91c2318639e7 100644
--- a/fs/smb/server/connection.h
+++ b/fs/smb/server/connection.h
@@ -168,6 +168,8 @@ int ksmbd_conn_transport_init(void);
void ksmbd_conn_transport_destroy(void);
void ksmbd_conn_lock(struct ksmbd_conn *conn);
void ksmbd_conn_unlock(struct ksmbd_conn *conn);
+void ksmbd_conn_r_count_inc(struct ksmbd_conn *conn);
+void ksmbd_conn_r_count_dec(struct ksmbd_conn *conn);
/*
* WARNING
diff --git a/fs/smb/server/ksmbd_work.c b/fs/smb/server/ksmbd_work.c
index 4af2e6007c29..72b00ca6e455 100644
--- a/fs/smb/server/ksmbd_work.c
+++ b/fs/smb/server/ksmbd_work.c
@@ -26,7 +26,6 @@ struct ksmbd_work *ksmbd_alloc_work_struct(void)
INIT_LIST_HEAD(&work->request_entry);
INIT_LIST_HEAD(&work->async_request_entry);
INIT_LIST_HEAD(&work->fp_entry);
- INIT_LIST_HEAD(&work->interim_entry);
INIT_LIST_HEAD(&work->aux_read_list);
work->iov_alloc_cnt = 4;
work->iov = kcalloc(work->iov_alloc_cnt, sizeof(struct kvec),
@@ -56,8 +55,6 @@ void ksmbd_free_work_struct(struct ksmbd_work *work)
kfree(work->tr_buf);
kvfree(work->request_buf);
kfree(work->iov);
- if (!list_empty(&work->interim_entry))
- list_del(&work->interim_entry);
if (work->async_id)
ksmbd_release_id(&work->conn->async_ida, work->async_id);
diff --git a/fs/smb/server/ksmbd_work.h b/fs/smb/server/ksmbd_work.h
index 8ca2c813246e..d36393ff8310 100644
--- a/fs/smb/server/ksmbd_work.h
+++ b/fs/smb/server/ksmbd_work.h
@@ -89,7 +89,6 @@ struct ksmbd_work {
/* List head at conn->async_requests */
struct list_head async_request_entry;
struct list_head fp_entry;
- struct list_head interim_entry;
};
/**
diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c
index 3a3fe4afbdf0..28886ff1ee57 100644
--- a/fs/smb/server/oplock.c
+++ b/fs/smb/server/oplock.c
@@ -46,7 +46,6 @@ static struct oplock_info *alloc_opinfo(struct ksmbd_work *work,
opinfo->fid = id;
opinfo->Tid = Tid;
INIT_LIST_HEAD(&opinfo->op_entry);
- INIT_LIST_HEAD(&opinfo->interim_list);
init_waitqueue_head(&opinfo->oplock_q);
init_waitqueue_head(&opinfo->oplock_brk);
atomic_set(&opinfo->refcount, 1);
@@ -635,6 +634,7 @@ static void __smb2_oplock_break_noti(struct work_struct *wk)
{
struct smb2_oplock_break *rsp = NULL;
struct ksmbd_work *work = container_of(wk, struct ksmbd_work, work);
+ struct ksmbd_conn *conn = work->conn;
struct oplock_break_info *br_info = work->request_buf;
struct smb2_hdr *rsp_hdr;
struct ksmbd_file *fp;
@@ -690,6 +690,7 @@ static void __smb2_oplock_break_noti(struct work_struct *wk)
out:
ksmbd_free_work_struct(work);
+ ksmbd_conn_r_count_dec(conn);
}
/**
@@ -724,6 +725,7 @@ static int smb2_oplock_break_noti(struct oplock_info *opinfo)
work->sess = opinfo->sess;
if (opinfo->op_state == OPLOCK_ACK_WAIT) {
+ ksmbd_conn_r_count_inc(conn);
INIT_WORK(&work->work, __smb2_oplock_break_noti);
ksmbd_queue_work(work);
@@ -745,6 +747,7 @@ static void __smb2_lease_break_noti(struct work_struct *wk)
{
struct smb2_lease_break *rsp = NULL;
struct ksmbd_work *work = container_of(wk, struct ksmbd_work, work);
+ struct ksmbd_conn *conn = work->conn;
struct lease_break_info *br_info = work->request_buf;
struct smb2_hdr *rsp_hdr;
@@ -791,6 +794,7 @@ static void __smb2_lease_break_noti(struct work_struct *wk)
out:
ksmbd_free_work_struct(work);
+ ksmbd_conn_r_count_dec(conn);
}
/**
@@ -803,7 +807,6 @@ out:
static int smb2_lease_break_noti(struct oplock_info *opinfo)
{
struct ksmbd_conn *conn = opinfo->conn;
- struct list_head *tmp, *t;
struct ksmbd_work *work;
struct lease_break_info *br_info;
struct lease *lease = opinfo->o_lease;
@@ -831,16 +834,7 @@ static int smb2_lease_break_noti(struct oplock_info *opinfo)
work->sess = opinfo->sess;
if (opinfo->op_state == OPLOCK_ACK_WAIT) {
- list_for_each_safe(tmp, t, &opinfo->interim_list) {
- struct ksmbd_work *in_work;
-
- in_work = list_entry(tmp, struct ksmbd_work,
- interim_entry);
- setup_async_work(in_work, NULL, NULL);
- smb2_send_interim_resp(in_work, STATUS_PENDING);
- list_del_init(&in_work->interim_entry);
- release_async_work(in_work);
- }
+ ksmbd_conn_r_count_inc(conn);
INIT_WORK(&work->work, __smb2_lease_break_noti);
ksmbd_queue_work(work);
wait_for_break_ack(opinfo);
@@ -871,7 +865,8 @@ static void wait_lease_breaking(struct oplock_info *opinfo)
}
}
-static int oplock_break(struct oplock_info *brk_opinfo, int req_op_level)
+static int oplock_break(struct oplock_info *brk_opinfo, int req_op_level,
+ struct ksmbd_work *in_work)
{
int err = 0;
@@ -914,9 +909,15 @@ static int oplock_break(struct oplock_info *brk_opinfo, int req_op_level)
}
if (lease->state & (SMB2_LEASE_WRITE_CACHING_LE |
- SMB2_LEASE_HANDLE_CACHING_LE))
+ SMB2_LEASE_HANDLE_CACHING_LE)) {
+ if (in_work) {
+ setup_async_work(in_work, NULL, NULL);
+ smb2_send_interim_resp(in_work, STATUS_PENDING);
+ release_async_work(in_work);
+ }
+
brk_opinfo->op_state = OPLOCK_ACK_WAIT;
- else
+ } else
atomic_dec(&brk_opinfo->breaking_cnt);
} else {
err = oplock_break_pending(brk_opinfo, req_op_level);
@@ -1116,7 +1117,7 @@ void smb_send_parent_lease_break_noti(struct ksmbd_file *fp,
if (ksmbd_conn_releasing(opinfo->conn))
continue;
- oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE);
+ oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE, NULL);
opinfo_put(opinfo);
}
}
@@ -1152,7 +1153,7 @@ void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp)
if (ksmbd_conn_releasing(opinfo->conn))
continue;
- oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE);
+ oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE, NULL);
opinfo_put(opinfo);
}
}
@@ -1252,8 +1253,7 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid,
goto op_break_not_needed;
}
- list_add(&work->interim_entry, &prev_opinfo->interim_list);
- err = oplock_break(prev_opinfo, SMB2_OPLOCK_LEVEL_II);
+ err = oplock_break(prev_opinfo, SMB2_OPLOCK_LEVEL_II, work);
opinfo_put(prev_opinfo);
if (err == -ENOENT)
goto set_lev;
@@ -1322,8 +1322,7 @@ static void smb_break_all_write_oplock(struct ksmbd_work *work,
}
brk_opinfo->open_trunc = is_trunc;
- list_add(&work->interim_entry, &brk_opinfo->interim_list);
- oplock_break(brk_opinfo, SMB2_OPLOCK_LEVEL_II);
+ oplock_break(brk_opinfo, SMB2_OPLOCK_LEVEL_II, work);
opinfo_put(brk_opinfo);
}
@@ -1386,7 +1385,7 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp,
SMB2_LEASE_KEY_SIZE))
goto next;
brk_op->open_trunc = is_trunc;
- oplock_break(brk_op, SMB2_OPLOCK_LEVEL_NONE);
+ oplock_break(brk_op, SMB2_OPLOCK_LEVEL_NONE, NULL);
next:
opinfo_put(brk_op);
rcu_read_lock();
diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h
index 72bc88a63a40..3f64f0787263 100644
--- a/fs/smb/server/oplock.h
+++ b/fs/smb/server/oplock.h
@@ -67,7 +67,6 @@ struct oplock_info {
bool is_lease;
bool open_trunc; /* truncate on open */
struct lease *o_lease;
- struct list_head interim_list;
struct list_head op_entry;
struct list_head lease_entry;
wait_queue_head_t oplock_q; /* Other server threads */
diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c
index 601e7fcbcf1e..ab533c602987 100644
--- a/fs/smb/server/server.c
+++ b/fs/smb/server/server.c
@@ -270,17 +270,7 @@ static void handle_ksmbd_work(struct work_struct *wk)
ksmbd_conn_try_dequeue_request(work);
ksmbd_free_work_struct(work);
- /*
- * Checking waitqueue to dropping pending requests on
- * disconnection. waitqueue_active is safe because it
- * uses atomic operation for condition.
- */
- atomic_inc(&conn->refcnt);
- if (!atomic_dec_return(&conn->r_count) && waitqueue_active(&conn->r_count_q))
- wake_up(&conn->r_count_q);
-
- if (atomic_dec_and_test(&conn->refcnt))
- kfree(conn);
+ ksmbd_conn_r_count_dec(conn);
}
/**
@@ -310,7 +300,7 @@ static int queue_ksmbd_work(struct ksmbd_conn *conn)
conn->request_buf = NULL;
ksmbd_conn_enqueue_request(work);
- atomic_inc(&conn->r_count);
+ ksmbd_conn_r_count_inc(conn);
/* update activity on connection */
conn->last_active = jiffies;
INIT_WORK(&work->work, handle_ksmbd_work);
diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c
index 1d94bb784108..0bc96ab6580b 100644
--- a/fs/vboxsf/super.c
+++ b/fs/vboxsf/super.c
@@ -21,8 +21,7 @@
#define VBOXSF_SUPER_MAGIC 0x786f4256 /* 'VBox' little endian */
-static const unsigned char VBSF_MOUNT_SIGNATURE[4] = { '\000', '\377', '\376',
- '\375' };
+static const unsigned char VBSF_MOUNT_SIGNATURE[4] __nonstring = "\000\377\376\375";
static int follow_symlinks;
module_param(follow_symlinks, int, 0444);
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 3d33e17f2e5c..7839efe050bf 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -33,8 +33,6 @@ struct kmem_cache *xfs_extfree_item_cache;
struct workqueue_struct *xfs_alloc_wq;
-#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
-
#define XFSA_FIXUP_BNO_OK 1
#define XFSA_FIXUP_CNT_OK 2
@@ -410,8 +408,8 @@ xfs_alloc_compute_diff(
if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
if (newlen1 < newlen2 ||
(newlen1 == newlen2 &&
- XFS_ABSDIFF(newbno1, wantbno) >
- XFS_ABSDIFF(newbno2, wantbno)))
+ abs_diff(newbno1, wantbno) >
+ abs_diff(newbno2, wantbno)))
newbno1 = newbno2;
} else if (newbno2 != NULLAGBLOCK)
newbno1 = newbno2;
@@ -427,7 +425,7 @@ xfs_alloc_compute_diff(
} else
newbno1 = freeend - wantlen;
*newbnop = newbno1;
- return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
+ return newbno1 == NULLAGBLOCK ? 0 : abs_diff(newbno1, wantbno);
}
/*
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f7a7d89c345e..9a435b1ff264 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1451,9 +1451,6 @@ xfs_dax_read_fault(
trace_xfs_read_fault(ip, order);
- ret = filemap_fsnotify_fault(vmf);
- if (unlikely(ret))
- return ret;
xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
ret = xfs_dax_fault_locked(vmf, order, false);
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
@@ -1482,16 +1479,6 @@ xfs_write_fault(
vm_fault_t ret;
trace_xfs_write_fault(ip, order);
- /*
- * Usually we get here from ->page_mkwrite callback but in case of DAX
- * we will get here also for ordinary write fault. Handle HSM
- * notifications for that case.
- */
- if (IS_DAX(inode)) {
- ret = filemap_fsnotify_fault(vmf);
- if (unlikely(ret))
- return ret;
- }
sb_start_pagefault(inode->i_sb);
file_update_time(vmf->vma->vm_file);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 71f4f0cc3dac..aba9c24486aa 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -852,12 +852,20 @@ static inline bool blk_mq_is_reserved_rq(struct request *rq)
return rq->rq_flags & RQF_RESV;
}
-/*
+/**
+ * blk_mq_add_to_batch() - add a request to the completion batch
+ * @req: The request to add to batch
+ * @iob: The batch to add the request
+ * @is_error: Specify true if the request failed with an error
+ * @complete: The completaion handler for the request
+ *
* Batched completions only work when there is no I/O error and no special
* ->end_io handler.
+ *
+ * Return: true when the request was added to the batch, otherwise false
*/
static inline bool blk_mq_add_to_batch(struct request *req,
- struct io_comp_batch *iob, int ioerror,
+ struct io_comp_batch *iob, bool is_error,
void (*complete)(struct io_comp_batch *))
{
/*
@@ -865,7 +873,7 @@ static inline bool blk_mq_add_to_batch(struct request *req,
* 1) No batch container
* 2) Has scheduler data attached
* 3) Not a passthrough request and end_io set
- * 4) Not a passthrough request and an ioerror
+ * 4) Not a passthrough request and failed with an error
*/
if (!iob)
return false;
@@ -874,7 +882,7 @@ static inline bool blk_mq_add_to_batch(struct request *req,
if (!blk_rq_is_passthrough(req)) {
if (req->end_io)
return false;
- if (ioerror < 0)
+ if (is_error)
return false;
}
diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index ec00e3f7af2b..ee2614adb785 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -212,7 +212,7 @@ const volatile void * __must_check_fn(const volatile void *val)
{ return val; }
#define no_free_ptr(p) \
- ((typeof(p)) __must_check_fn(__get_and_null(p, NULL)))
+ ((typeof(p)) __must_check_fn((__force const volatile void *)__get_and_null(p, NULL)))
#define return_ptr(p) return no_free_ptr(p)
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 6a33288bd6a1..83d3ac97f826 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -171,6 +171,21 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
}
/*
+ * fsnotify_mmap_perm - permission hook before mmap of file range
+ */
+static inline int fsnotify_mmap_perm(struct file *file, int prot,
+ const loff_t off, size_t len)
+{
+ /*
+ * mmap() generates only pre-content events.
+ */
+ if (!file || likely(!FMODE_FSNOTIFY_HSM(file->f_mode)))
+ return 0;
+
+ return fsnotify_pre_content(&file->f_path, &off, len);
+}
+
+/*
* fsnotify_truncate_perm - permission hook before file truncate
*/
static inline int fsnotify_truncate_perm(const struct path *path, loff_t length)
@@ -223,6 +238,12 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
return 0;
}
+static inline int fsnotify_mmap_perm(struct file *file, int prot,
+ const loff_t off, size_t len)
+{
+ return 0;
+}
+
static inline int fsnotify_truncate_perm(const struct path *path, loff_t length)
{
return 0;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f34f4cfaa513..291d49b9bf05 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -267,6 +267,7 @@ struct kvm_gfn_range {
union kvm_mmu_notifier_arg arg;
enum kvm_gfn_range_filter attr_filter;
bool may_block;
+ bool lockless;
};
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
@@ -1746,7 +1747,6 @@ static inline void kvm_unregister_perf_callbacks(void) {}
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
void kvm_arch_destroy_vm(struct kvm *kvm);
-void kvm_arch_sync_events(struct kvm *kvm);
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
@@ -2382,7 +2382,7 @@ static inline bool kvm_is_visible_memslot(struct kvm_memory_slot *memslot)
struct kvm_vcpu *kvm_get_running_vcpu(void);
struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
-#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
bool kvm_arch_has_irq_bypass(void);
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *,
struct irq_bypass_producer *);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7b1068ddcbb7..8483e09aeb2c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3420,7 +3420,6 @@ extern vm_fault_t filemap_fault(struct vm_fault *vmf);
extern vm_fault_t filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
-extern vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf);
extern unsigned long stack_guard_gap;
/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index f756fac95488..6281063cbd8e 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -804,6 +804,7 @@ struct hci_conn_params {
extern struct list_head hci_dev_list;
extern struct list_head hci_cb_list;
extern rwlock_t hci_dev_list_lock;
+extern struct mutex hci_cb_list_lock;
#define hci_dev_set_flag(hdev, nr) set_bit((nr), (hdev)->dev_flags)
#define hci_dev_clear_flag(hdev, nr) clear_bit((nr), (hdev)->dev_flags)
@@ -2010,47 +2011,24 @@ struct hci_cb {
char *name;
- bool (*match) (struct hci_conn *conn);
void (*connect_cfm) (struct hci_conn *conn, __u8 status);
void (*disconn_cfm) (struct hci_conn *conn, __u8 status);
void (*security_cfm) (struct hci_conn *conn, __u8 status,
- __u8 encrypt);
+ __u8 encrypt);
void (*key_change_cfm) (struct hci_conn *conn, __u8 status);
void (*role_switch_cfm) (struct hci_conn *conn, __u8 status, __u8 role);
};
-static inline void hci_cb_lookup(struct hci_conn *conn, struct list_head *list)
-{
- struct hci_cb *cb, *cpy;
-
- rcu_read_lock();
- list_for_each_entry_rcu(cb, &hci_cb_list, list) {
- if (cb->match && cb->match(conn)) {
- cpy = kmalloc(sizeof(*cpy), GFP_ATOMIC);
- if (!cpy)
- break;
-
- *cpy = *cb;
- INIT_LIST_HEAD(&cpy->list);
- list_add_rcu(&cpy->list, list);
- }
- }
- rcu_read_unlock();
-}
-
static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status)
{
- struct list_head list;
- struct hci_cb *cb, *tmp;
-
- INIT_LIST_HEAD(&list);
- hci_cb_lookup(conn, &list);
+ struct hci_cb *cb;
- list_for_each_entry_safe(cb, tmp, &list, list) {
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
if (cb->connect_cfm)
cb->connect_cfm(conn, status);
- kfree(cb);
}
+ mutex_unlock(&hci_cb_list_lock);
if (conn->connect_cfm_cb)
conn->connect_cfm_cb(conn, status);
@@ -2058,43 +2036,22 @@ static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status)
static inline void hci_disconn_cfm(struct hci_conn *conn, __u8 reason)
{
- struct list_head list;
- struct hci_cb *cb, *tmp;
-
- INIT_LIST_HEAD(&list);
- hci_cb_lookup(conn, &list);
+ struct hci_cb *cb;
- list_for_each_entry_safe(cb, tmp, &list, list) {
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
if (cb->disconn_cfm)
cb->disconn_cfm(conn, reason);
- kfree(cb);
}
+ mutex_unlock(&hci_cb_list_lock);
if (conn->disconn_cfm_cb)
conn->disconn_cfm_cb(conn, reason);
}
-static inline void hci_security_cfm(struct hci_conn *conn, __u8 status,
- __u8 encrypt)
-{
- struct list_head list;
- struct hci_cb *cb, *tmp;
-
- INIT_LIST_HEAD(&list);
- hci_cb_lookup(conn, &list);
-
- list_for_each_entry_safe(cb, tmp, &list, list) {
- if (cb->security_cfm)
- cb->security_cfm(conn, status, encrypt);
- kfree(cb);
- }
-
- if (conn->security_cfm_cb)
- conn->security_cfm_cb(conn, status);
-}
-
static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status)
{
+ struct hci_cb *cb;
__u8 encrypt;
if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags))
@@ -2102,11 +2059,20 @@ static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status)
encrypt = test_bit(HCI_CONN_ENCRYPT, &conn->flags) ? 0x01 : 0x00;
- hci_security_cfm(conn, status, encrypt);
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
+ if (cb->security_cfm)
+ cb->security_cfm(conn, status, encrypt);
+ }
+ mutex_unlock(&hci_cb_list_lock);
+
+ if (conn->security_cfm_cb)
+ conn->security_cfm_cb(conn, status);
}
static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status)
{
+ struct hci_cb *cb;
__u8 encrypt;
if (conn->state == BT_CONFIG) {
@@ -2133,38 +2099,40 @@ static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status)
conn->sec_level = conn->pending_sec_level;
}
- hci_security_cfm(conn, status, encrypt);
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
+ if (cb->security_cfm)
+ cb->security_cfm(conn, status, encrypt);
+ }
+ mutex_unlock(&hci_cb_list_lock);
+
+ if (conn->security_cfm_cb)
+ conn->security_cfm_cb(conn, status);
}
static inline void hci_key_change_cfm(struct hci_conn *conn, __u8 status)
{
- struct list_head list;
- struct hci_cb *cb, *tmp;
-
- INIT_LIST_HEAD(&list);
- hci_cb_lookup(conn, &list);
+ struct hci_cb *cb;
- list_for_each_entry_safe(cb, tmp, &list, list) {
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
if (cb->key_change_cfm)
cb->key_change_cfm(conn, status);
- kfree(cb);
}
+ mutex_unlock(&hci_cb_list_lock);
}
static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status,
__u8 role)
{
- struct list_head list;
- struct hci_cb *cb, *tmp;
-
- INIT_LIST_HEAD(&list);
- hci_cb_lookup(conn, &list);
+ struct hci_cb *cb;
- list_for_each_entry_safe(cb, tmp, &list, list) {
+ mutex_lock(&hci_cb_list_lock);
+ list_for_each_entry(cb, &hci_cb_list, list) {
if (cb->role_switch_cfm)
cb->role_switch_cfm(conn, status, role);
- kfree(cb);
}
+ mutex_unlock(&hci_cb_list_lock);
}
static inline bool hci_bdaddr_is_rpa(bdaddr_t *bdaddr, u8 addr_type)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 60d5dcdb289c..803d5f1601f9 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1891,7 +1891,7 @@ void nft_chain_filter_fini(void);
void __init nft_chain_route_init(void);
void nft_chain_route_fini(void);
-void nf_tables_trans_destroy_flush_work(void);
+void nf_tables_trans_destroy_flush_work(struct net *net);
int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result);
__be64 nf_jiffies64_to_msecs(u64 input);
@@ -1905,6 +1905,7 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re
struct nftables_pernet {
struct list_head tables;
struct list_head commit_list;
+ struct list_head destroy_list;
struct list_head commit_set_list;
struct list_head binding_list;
struct list_head module_list;
@@ -1915,6 +1916,7 @@ struct nftables_pernet {
unsigned int base_seq;
unsigned int gc_seq;
u8 validate_state;
+ struct work_struct destroy_work;
};
extern unsigned int nf_tables_net_id;
diff --git a/include/sound/soc.h b/include/sound/soc.h
index fcdb5adfcd5e..b3e84bc47c6f 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1261,7 +1261,10 @@ void snd_soc_close_delayed_work(struct snd_soc_pcm_runtime *rtd);
/* mixer control */
struct soc_mixer_control {
- int min, max, platform_max;
+ /* Minimum and maximum specified as written to the hardware */
+ int min, max;
+ /* Limited maximum value specified as presented through the control */
+ int platform_max;
int reg, rreg;
unsigned int shift, rshift;
unsigned int sign_bit;
diff --git a/init/Kconfig b/init/Kconfig
index d0d021b3fa3b..324c2886b2ea 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1973,7 +1973,7 @@ config RUST
depends on !MODVERSIONS || GENDWARFKSYMS
depends on !GCC_PLUGIN_RANDSTRUCT
depends on !RANDSTRUCT
- depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE
+ depends on !DEBUG_INFO_BTF || (PAHOLE_HAS_LANG_EXCLUDE && !LTO)
depends on !CFI_CLANG || HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC
select CFI_ICALL_NORMALIZE_INTEGERS if CFI_CLANG
depends on !CALL_PADDING || RUSTC_VERSION >= 108100
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index c38a2d2d4a7e..78dd3d8c6554 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -59,8 +59,8 @@ struct rt_mutex_waiter {
};
/**
- * rt_wake_q_head - Wrapper around regular wake_q_head to support
- * "sleeping" spinlocks on RT
+ * struct rt_wake_q_head - Wrapper around regular wake_q_head to support
+ * "sleeping" spinlocks on RT
* @head: The regular wake_q_head for sleeping lock variants
* @rtlock_task: Task pointer for RT lock (spin/rwlock) wakeups
*/
diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
index 34bfae72f295..de9117c0e671 100644
--- a/kernel/locking/semaphore.c
+++ b/kernel/locking/semaphore.c
@@ -29,6 +29,7 @@
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
+#include <linux/sched/wake_q.h>
#include <linux/semaphore.h>
#include <linux/spinlock.h>
#include <linux/ftrace.h>
@@ -38,7 +39,7 @@ static noinline void __down(struct semaphore *sem);
static noinline int __down_interruptible(struct semaphore *sem);
static noinline int __down_killable(struct semaphore *sem);
static noinline int __down_timeout(struct semaphore *sem, long timeout);
-static noinline void __up(struct semaphore *sem);
+static noinline void __up(struct semaphore *sem, struct wake_q_head *wake_q);
/**
* down - acquire the semaphore
@@ -183,13 +184,16 @@ EXPORT_SYMBOL(down_timeout);
void __sched up(struct semaphore *sem)
{
unsigned long flags;
+ DEFINE_WAKE_Q(wake_q);
raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(list_empty(&sem->wait_list)))
sem->count++;
else
- __up(sem);
+ __up(sem, &wake_q);
raw_spin_unlock_irqrestore(&sem->lock, flags);
+ if (!wake_q_empty(&wake_q))
+ wake_up_q(&wake_q);
}
EXPORT_SYMBOL(up);
@@ -269,11 +273,12 @@ static noinline int __sched __down_timeout(struct semaphore *sem, long timeout)
return __down_common(sem, TASK_UNINTERRUPTIBLE, timeout);
}
-static noinline void __sched __up(struct semaphore *sem)
+static noinline void __sched __up(struct semaphore *sem,
+ struct wake_q_head *wake_q)
{
struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
struct semaphore_waiter, list);
list_del(&waiter->list);
waiter->up = true;
- wake_up_process(waiter->task);
+ wake_q_add(wake_q, waiter->task);
}
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 5d9143dd0879..6dab4854c6c0 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -9,8 +9,6 @@
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-DEFINE_STATIC_KEY_FALSE(sched_clock_irqtime);
-
/*
* There are no locks covering percpu hardirq/softirq time.
* They are only modified in vtime_account, on corresponding CPU
@@ -24,14 +22,16 @@ DEFINE_STATIC_KEY_FALSE(sched_clock_irqtime);
*/
DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
+int sched_clock_irqtime;
+
void enable_sched_clock_irqtime(void)
{
- static_branch_enable(&sched_clock_irqtime);
+ sched_clock_irqtime = 1;
}
void disable_sched_clock_irqtime(void)
{
- static_branch_disable(&sched_clock_irqtime);
+ sched_clock_irqtime = 0;
}
static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 0f1da199cfc7..7b9dfee858e7 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -6422,6 +6422,9 @@ static bool check_builtin_idle_enabled(void)
__bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
u64 wake_flags, bool *is_idle)
{
+ if (!ops_cpu_valid(prev_cpu, NULL))
+ goto prev_cpu;
+
if (!check_builtin_idle_enabled())
goto prev_cpu;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c8512a9fb022..023b844159c9 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3259,11 +3259,11 @@ struct irqtime {
};
DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
-DECLARE_STATIC_KEY_FALSE(sched_clock_irqtime);
+extern int sched_clock_irqtime;
static inline int irqtime_enabled(void)
{
- return static_branch_likely(&sched_clock_irqtime);
+ return sched_clock_irqtime;
}
/*
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index ad7419e24055..53dc6719181e 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -5689,12 +5689,16 @@ static int event_hist_open(struct inode *inode, struct file *file)
guard(mutex)(&event_mutex);
event_file = event_file_data(file);
- if (!event_file)
- return -ENODEV;
+ if (!event_file) {
+ ret = -ENODEV;
+ goto err;
+ }
hist_file = kzalloc(sizeof(*hist_file), GFP_KERNEL);
- if (!hist_file)
- return -ENOMEM;
+ if (!hist_file) {
+ ret = -ENOMEM;
+ goto err;
+ }
hist_file->file = file;
hist_file->last_act = get_hist_hit_count(event_file);
@@ -5702,9 +5706,14 @@ static int event_hist_open(struct inode *inode, struct file *file)
/* Clear private_data to avoid warning in single_open() */
file->private_data = NULL;
ret = single_open(file, hist_show, hist_file);
- if (ret)
+ if (ret) {
kfree(hist_file);
+ goto err;
+ }
+ return 0;
+err:
+ tracing_release_file_tr(inode, file);
return ret;
}
@@ -5979,7 +5988,10 @@ static int event_hist_debug_open(struct inode *inode, struct file *file)
/* Clear private_data to avoid warning in single_open() */
file->private_data = NULL;
- return single_open(file, hist_debug_show, file);
+ ret = single_open(file, hist_debug_show, file);
+ if (ret)
+ tracing_release_file_tr(inode, file);
+ return ret;
}
const struct file_operations event_hist_debug_fops = {
diff --git a/mm/filemap.c b/mm/filemap.c
index 2974691fdfad..6d616bb9001e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -47,7 +47,6 @@
#include <linux/splice.h>
#include <linux/rcupdate_wait.h>
#include <linux/sched/mm.h>
-#include <linux/fsnotify.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include "internal.h"
@@ -3198,14 +3197,6 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
unsigned long vm_flags = vmf->vma->vm_flags;
unsigned int mmap_miss;
- /*
- * If we have pre-content watches we need to disable readahead to make
- * sure that we don't populate our mapping with 0 filled pages that we
- * never emitted an event for.
- */
- if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode)))
- return fpin;
-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* Use the readahead code, even if readahead is disabled */
if ((vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) {
@@ -3274,10 +3265,6 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
struct file *fpin = NULL;
unsigned int mmap_miss;
- /* See comment in do_sync_mmap_readahead. */
- if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode)))
- return fpin;
-
/* If we don't want any read-ahead, don't bother */
if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages)
return fpin;
@@ -3337,48 +3324,6 @@ static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf)
}
/**
- * filemap_fsnotify_fault - maybe emit a pre-content event.
- * @vmf: struct vm_fault containing details of the fault.
- *
- * If we have a pre-content watch on this file we will emit an event for this
- * range. If we return anything the fault caller should return immediately, we
- * will return VM_FAULT_RETRY if we had to emit an event, which will trigger the
- * fault again and then the fault handler will run the second time through.
- *
- * Return: a bitwise-OR of %VM_FAULT_ codes, 0 if nothing happened.
- */
-vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf)
-{
- struct file *fpin = NULL;
- int mask = (vmf->flags & FAULT_FLAG_WRITE) ? MAY_WRITE : MAY_ACCESS;
- loff_t pos = vmf->pgoff >> PAGE_SHIFT;
- size_t count = PAGE_SIZE;
- int err;
-
- /*
- * We already did this and now we're retrying with everything locked,
- * don't emit the event and continue.
- */
- if (vmf->flags & FAULT_FLAG_TRIED)
- return 0;
-
- /* No watches, we're done. */
- if (likely(!FMODE_FSNOTIFY_HSM(vmf->vma->vm_file->f_mode)))
- return 0;
-
- fpin = maybe_unlock_mmap_for_io(vmf, fpin);
- if (!fpin)
- return VM_FAULT_SIGBUS;
-
- err = fsnotify_file_area_perm(fpin, mask, &pos, count);
- fput(fpin);
- if (err)
- return VM_FAULT_SIGBUS;
- return VM_FAULT_RETRY;
-}
-EXPORT_SYMBOL_GPL(filemap_fsnotify_fault);
-
-/**
* filemap_fault - read in file data for page fault handling
* @vmf: struct vm_fault containing details of the fault
*
@@ -3482,37 +3427,6 @@ retry_find:
*/
if (unlikely(!folio_test_uptodate(folio))) {
/*
- * If this is a precontent file we have can now emit an event to
- * try and populate the folio.
- */
- if (!(vmf->flags & FAULT_FLAG_TRIED) &&
- unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) {
- loff_t pos = folio_pos(folio);
- size_t count = folio_size(folio);
-
- /* We're NOWAIT, we have to retry. */
- if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) {
- folio_unlock(folio);
- goto out_retry;
- }
-
- if (mapping_locked)
- filemap_invalidate_unlock_shared(mapping);
- mapping_locked = false;
-
- folio_unlock(folio);
- fpin = maybe_unlock_mmap_for_io(vmf, fpin);
- if (!fpin)
- goto out_retry;
-
- error = fsnotify_file_area_perm(fpin, MAY_ACCESS, &pos,
- count);
- if (error)
- ret = VM_FAULT_SIGBUS;
- goto out_retry;
- }
-
- /*
* If the invalidate lock is not held, the folio was in cache
* and uptodate and now it is not. Strange but possible since we
* didn't hold the page lock all the time. Let's drop
diff --git a/mm/memory.c b/mm/memory.c
index b9661ccfa64f..fb7b8dc75167 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -76,7 +76,6 @@
#include <linux/ptrace.h>
#include <linux/vmalloc.h>
#include <linux/sched/sysctl.h>
-#include <linux/fsnotify.h>
#include <trace/events/kmem.h>
@@ -5750,17 +5749,8 @@ out_map:
static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
-
if (vma_is_anonymous(vma))
return do_huge_pmd_anonymous_page(vmf);
- /*
- * Currently we just emit PAGE_SIZE for our fault events, so don't allow
- * a huge fault if we have a pre content watch on this file. This would
- * be trivial to support, but there would need to be tests to ensure
- * this works properly and those don't exist currently.
- */
- if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
- return VM_FAULT_FALLBACK;
if (vma->vm_ops->huge_fault)
return vma->vm_ops->huge_fault(vmf, PMD_ORDER);
return VM_FAULT_FALLBACK;
@@ -5784,9 +5774,6 @@ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf)
}
if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
- /* See comment in create_huge_pmd. */
- if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
- goto split;
if (vma->vm_ops->huge_fault) {
ret = vma->vm_ops->huge_fault(vmf, PMD_ORDER);
if (!(ret & VM_FAULT_FALLBACK))
@@ -5809,9 +5796,6 @@ static vm_fault_t create_huge_pud(struct vm_fault *vmf)
/* No support for anonymous transparent PUD pages yet */
if (vma_is_anonymous(vma))
return VM_FAULT_FALLBACK;
- /* See comment in create_huge_pmd. */
- if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
- return VM_FAULT_FALLBACK;
if (vma->vm_ops->huge_fault)
return vma->vm_ops->huge_fault(vmf, PUD_ORDER);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -5829,9 +5813,6 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
if (vma_is_anonymous(vma))
goto split;
if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
- /* See comment in create_huge_pmd. */
- if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
- goto split;
if (vma->vm_ops->huge_fault) {
ret = vma->vm_ops->huge_fault(vmf, PUD_ORDER);
if (!(ret & VM_FAULT_FALLBACK))
diff --git a/mm/nommu.c b/mm/nommu.c
index baa79abdaf03..9cb6e99215e2 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1613,13 +1613,6 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
}
EXPORT_SYMBOL(remap_vmalloc_range);
-vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf)
-{
- BUG();
- return 0;
-}
-EXPORT_SYMBOL_GPL(filemap_fsnotify_fault);
-
vm_fault_t filemap_fault(struct vm_fault *vmf)
{
BUG();
diff --git a/mm/readahead.c b/mm/readahead.c
index 220155a5c964..6a4e96b69702 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -128,7 +128,6 @@
#include <linux/blk-cgroup.h>
#include <linux/fadvise.h>
#include <linux/sched/mm.h>
-#include <linux/fsnotify.h>
#include "internal.h"
@@ -559,15 +558,6 @@ void page_cache_sync_ra(struct readahead_control *ractl,
pgoff_t prev_index, miss;
/*
- * If we have pre-content watches we need to disable readahead to make
- * sure that we don't find 0 filled pages in cache that we never emitted
- * events for. Filesystems supporting HSM must make sure to not call
- * this function with ractl->file unset for files handled by HSM.
- */
- if (ractl->file && unlikely(FMODE_FSNOTIFY_HSM(ractl->file->f_mode)))
- return;
-
- /*
* Even if readahead is disabled, issue this request as readahead
* as we'll need it to satisfy the requested range. The forced
* readahead will do the right thing and limit the read to just the
@@ -645,10 +635,6 @@ void page_cache_async_ra(struct readahead_control *ractl,
if (!ra->ra_pages)
return;
- /* See the comment in page_cache_sync_ra. */
- if (ractl->file && unlikely(FMODE_FSNOTIFY_HSM(ractl->file->f_mode)))
- return;
-
/*
* Same bit is used for PG_readahead and PG_reclaim.
*/
diff --git a/mm/util.c b/mm/util.c
index b6b9684a1438..8c965474d329 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -23,6 +23,7 @@
#include <linux/processor.h>
#include <linux/sizes.h>
#include <linux/compat.h>
+#include <linux/fsnotify.h>
#include <linux/uaccess.h>
@@ -569,6 +570,8 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
LIST_HEAD(uf);
ret = security_mmap_file(file, prot, flag);
+ if (!ret)
+ ret = fsnotify_mmap_perm(file, prot, pgoff >> PAGE_SHIFT, len);
if (!ret) {
if (mmap_write_lock_killable(mm))
return -EINTR;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index e7ec12437c8b..012fc107901a 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -57,6 +57,7 @@ DEFINE_RWLOCK(hci_dev_list_lock);
/* HCI callback list */
LIST_HEAD(hci_cb_list);
+DEFINE_MUTEX(hci_cb_list_lock);
/* HCI ID Numbering */
static DEFINE_IDA(hci_index_ida);
@@ -2972,7 +2973,9 @@ int hci_register_cb(struct hci_cb *cb)
{
BT_DBG("%p name %s", cb, cb->name);
- list_add_tail_rcu(&cb->list, &hci_cb_list);
+ mutex_lock(&hci_cb_list_lock);
+ list_add_tail(&cb->list, &hci_cb_list);
+ mutex_unlock(&hci_cb_list_lock);
return 0;
}
@@ -2982,8 +2985,9 @@ int hci_unregister_cb(struct hci_cb *cb)
{
BT_DBG("%p name %s", cb, cb->name);
- list_del_rcu(&cb->list);
- synchronize_rcu();
+ mutex_lock(&hci_cb_list_lock);
+ list_del(&cb->list);
+ mutex_unlock(&hci_cb_list_lock);
return 0;
}
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 2cc7a9306350..903b0b52692a 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3391,23 +3391,30 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data,
hci_update_scan(hdev);
}
- params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type);
- if (params) {
- switch (params->auto_connect) {
- case HCI_AUTO_CONN_LINK_LOSS:
- if (ev->reason != HCI_ERROR_CONNECTION_TIMEOUT)
+ /* Re-enable passive scanning if disconnected device is marked
+ * as auto-connectable.
+ */
+ if (conn->type == LE_LINK) {
+ params = hci_conn_params_lookup(hdev, &conn->dst,
+ conn->dst_type);
+ if (params) {
+ switch (params->auto_connect) {
+ case HCI_AUTO_CONN_LINK_LOSS:
+ if (ev->reason != HCI_ERROR_CONNECTION_TIMEOUT)
+ break;
+ fallthrough;
+
+ case HCI_AUTO_CONN_DIRECT:
+ case HCI_AUTO_CONN_ALWAYS:
+ hci_pend_le_list_del_init(params);
+ hci_pend_le_list_add(params,
+ &hdev->pend_le_conns);
+ hci_update_passive_scan(hdev);
break;
- fallthrough;
- case HCI_AUTO_CONN_DIRECT:
- case HCI_AUTO_CONN_ALWAYS:
- hci_pend_le_list_del_init(params);
- hci_pend_le_list_add(params, &hdev->pend_le_conns);
- hci_update_passive_scan(hdev);
- break;
-
- default:
- break;
+ default:
+ break;
+ }
}
}
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index 44acddf58a0c..0cb52a3308ba 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -2187,11 +2187,6 @@ done:
return HCI_LM_ACCEPT;
}
-static bool iso_match(struct hci_conn *hcon)
-{
- return hcon->type == ISO_LINK || hcon->type == LE_LINK;
-}
-
static void iso_connect_cfm(struct hci_conn *hcon, __u8 status)
{
if (hcon->type != ISO_LINK) {
@@ -2373,7 +2368,6 @@ drop:
static struct hci_cb iso_cb = {
.name = "ISO",
- .match = iso_match,
.connect_cfm = iso_connect_cfm,
.disconn_cfm = iso_disconn_cfm,
};
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index b22078b67972..c27ea70f71e1 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -7182,11 +7182,6 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c,
return NULL;
}
-static bool l2cap_match(struct hci_conn *hcon)
-{
- return hcon->type == ACL_LINK || hcon->type == LE_LINK;
-}
-
static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
{
struct hci_dev *hdev = hcon->hdev;
@@ -7194,6 +7189,9 @@ static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
struct l2cap_chan *pchan;
u8 dst_type;
+ if (hcon->type != ACL_LINK && hcon->type != LE_LINK)
+ return;
+
BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status);
if (status) {
@@ -7258,6 +7256,9 @@ int l2cap_disconn_ind(struct hci_conn *hcon)
static void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
{
+ if (hcon->type != ACL_LINK && hcon->type != LE_LINK)
+ return;
+
BT_DBG("hcon %p reason %d", hcon, reason);
l2cap_conn_del(hcon, bt_to_errno(reason));
@@ -7565,7 +7566,6 @@ unlock:
static struct hci_cb l2cap_cb = {
.name = "L2CAP",
- .match = l2cap_match,
.connect_cfm = l2cap_connect_cfm,
.disconn_cfm = l2cap_disconn_cfm,
.security_cfm = l2cap_security_cfm,
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 4c56ca5a216c..ad5177e3a69b 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -2134,11 +2134,6 @@ static int rfcomm_run(void *unused)
return 0;
}
-static bool rfcomm_match(struct hci_conn *hcon)
-{
- return hcon->type == ACL_LINK;
-}
-
static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
{
struct rfcomm_session *s;
@@ -2185,7 +2180,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
static struct hci_cb rfcomm_cb = {
.name = "RFCOMM",
- .match = rfcomm_match,
.security_cfm = rfcomm_security_cfm
};
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index aa7bfe26cb40..5d1bc0d6aee0 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -107,6 +107,14 @@ static void sco_conn_put(struct sco_conn *conn)
kref_put(&conn->ref, sco_conn_free);
}
+static struct sco_conn *sco_conn_hold(struct sco_conn *conn)
+{
+ BT_DBG("conn %p refcnt %u", conn, kref_read(&conn->ref));
+
+ kref_get(&conn->ref);
+ return conn;
+}
+
static struct sco_conn *sco_conn_hold_unless_zero(struct sco_conn *conn)
{
if (!conn)
@@ -1353,6 +1361,7 @@ static void sco_conn_ready(struct sco_conn *conn)
bacpy(&sco_pi(sk)->src, &conn->hcon->src);
bacpy(&sco_pi(sk)->dst, &conn->hcon->dst);
+ sco_conn_hold(conn);
hci_conn_hold(conn->hcon);
__sco_chan_add(conn, sk, parent);
@@ -1398,27 +1407,30 @@ int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
return lm;
}
-static bool sco_match(struct hci_conn *hcon)
-{
- return hcon->type == SCO_LINK || hcon->type == ESCO_LINK;
-}
-
static void sco_connect_cfm(struct hci_conn *hcon, __u8 status)
{
+ if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK)
+ return;
+
BT_DBG("hcon %p bdaddr %pMR status %u", hcon, &hcon->dst, status);
if (!status) {
struct sco_conn *conn;
conn = sco_conn_add(hcon);
- if (conn)
+ if (conn) {
sco_conn_ready(conn);
+ sco_conn_put(conn);
+ }
} else
sco_conn_del(hcon, bt_to_errno(status));
}
static void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason)
{
+ if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK)
+ return;
+
BT_DBG("hcon %p reason %d", hcon, reason);
sco_conn_del(hcon, bt_to_errno(reason));
@@ -1444,7 +1456,6 @@ drop:
static struct hci_cb sco_cb = {
.name = "SCO",
- .match = sco_match,
.connect_cfm = sco_connect_cfm,
.disconn_cfm = sco_disconn_cfm,
};
diff --git a/net/core/dev.c b/net/core/dev.c
index 30da277c5a6f..2f7f5fd9ffec 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3872,6 +3872,9 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
{
netdev_features_t features;
+ if (!skb_frags_readable(skb))
+ goto out_kfree_skb;
+
features = netif_skb_features(skb);
skb = validate_xmit_vlan(skb, features);
if (unlikely(!skb))
diff --git a/net/core/devmem.c b/net/core/devmem.c
index 3bba3f018df0..0e5a2c672efd 100644
--- a/net/core/devmem.c
+++ b/net/core/devmem.c
@@ -109,6 +109,7 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
struct netdev_rx_queue *rxq;
unsigned long xa_idx;
unsigned int rxq_idx;
+ int err;
if (binding->list.next)
list_del(&binding->list);
@@ -120,7 +121,8 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
rxq_idx = get_netdev_rx_queue_index(rxq);
- WARN_ON(netdev_rx_queue_restart(binding->dev, rxq_idx));
+ err = netdev_rx_queue_restart(binding->dev, rxq_idx);
+ WARN_ON(err && err != -ENETDOWN);
}
xa_erase(&net_devmem_dmabuf_bindings, binding->id);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 62b4041aae1a..0ab722d95a2d 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -319,6 +319,7 @@ static int netpoll_owner_active(struct net_device *dev)
static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
{
netdev_tx_t status = NETDEV_TX_BUSY;
+ netdev_tx_t ret = NET_XMIT_DROP;
struct net_device *dev;
unsigned long tries;
/* It is up to the caller to keep npinfo alive. */
@@ -327,11 +328,12 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
lockdep_assert_irqs_disabled();
dev = np->dev;
+ rcu_read_lock();
npinfo = rcu_dereference_bh(dev->npinfo);
if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
dev_kfree_skb_irq(skb);
- return NET_XMIT_DROP;
+ goto out;
}
/* don't get messages out of order, and no recursion */
@@ -370,7 +372,10 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
skb_queue_tail(&npinfo->txq, skb);
schedule_delayed_work(&npinfo->tx_work,0);
}
- return NETDEV_TX_OK;
+ ret = NETDEV_TX_OK;
+out:
+ rcu_read_unlock();
+ return ret;
}
netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c
index 691be6c445b3..ad3866c5a902 100644
--- a/net/ethtool/tsinfo.c
+++ b/net/ethtool/tsinfo.c
@@ -290,7 +290,8 @@ static void *ethnl_tsinfo_prepare_dump(struct sk_buff *skb,
reply_data = ctx->reply_data;
memset(reply_data, 0, sizeof(*reply_data));
reply_data->base.dev = dev;
- memset(&reply_data->ts_info, 0, sizeof(reply_data->ts_info));
+ reply_data->ts_info.cmd = ETHTOOL_GET_TS_INFO;
+ reply_data->ts_info.phc_index = -1;
return ehdr;
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ac8cc1076536..8b6258819dad 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3209,16 +3209,13 @@ static void add_v4_addrs(struct inet6_dev *idev)
struct in6_addr addr;
struct net_device *dev;
struct net *net = dev_net(idev->dev);
- int scope, plen, offset = 0;
+ int scope, plen;
u32 pflags = 0;
ASSERT_RTNL();
memset(&addr, 0, sizeof(struct in6_addr));
- /* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */
- if (idev->dev->addr_len == sizeof(struct in6_addr))
- offset = sizeof(struct in6_addr) - 4;
- memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4);
+ memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4);
if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) {
scope = IPV6_ADDR_COMPATv4;
@@ -3529,7 +3526,13 @@ static void addrconf_gre_config(struct net_device *dev)
return;
}
- if (dev->type == ARPHRD_ETHER) {
+ /* Generate the IPv6 link-local address using addrconf_addr_gen(),
+ * unless we have an IPv4 GRE device not bound to an IP address and
+ * which is in EUI64 mode (as __ipv6_isatap_ifid() would fail in this
+ * case). Such devices fall back to add_v4_addrs() instead.
+ */
+ if (!(dev->type == ARPHRD_IPGRE && *(__be32 *)dev->dev_addr == 0 &&
+ idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)) {
addrconf_addr_gen(idev, true);
return;
}
diff --git a/net/mac80211/eht.c b/net/mac80211/eht.c
index 7a3116c36df9..fd41046e3b68 100644
--- a/net/mac80211/eht.c
+++ b/net/mac80211/eht.c
@@ -2,7 +2,7 @@
/*
* EHT handling
*
- * Copyright(c) 2021-2024 Intel Corporation
+ * Copyright(c) 2021-2025 Intel Corporation
*/
#include "ieee80211_i.h"
@@ -76,6 +76,13 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata,
link_sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(link_sta);
link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta);
+ /*
+ * The MPDU length bits are reserved on all but 2.4 GHz and get set via
+ * VHT (5 GHz) or HE (6 GHz) capabilities.
+ */
+ if (sband->band != NL80211_BAND_2GHZ)
+ return;
+
switch (u8_get_bits(eht_cap->eht_cap_elem.mac_cap_info[0],
IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_MASK)) {
case IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_11454:
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 1e28efe4203c..0659ec892ec6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -6,7 +6,7 @@
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2024 Intel Corporation
+ * Copyright (C) 2018-2025 Intel Corporation
*/
#include <linux/jiffies.h>
@@ -3329,8 +3329,8 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
return;
}
- if (!ether_addr_equal(mgmt->sa, sdata->deflink.u.mgd.bssid) ||
- !ether_addr_equal(mgmt->bssid, sdata->deflink.u.mgd.bssid)) {
+ if (!ether_addr_equal(mgmt->sa, sdata->vif.cfg.ap_addr) ||
+ !ether_addr_equal(mgmt->bssid, sdata->vif.cfg.ap_addr)) {
/* Not from the current AP or not associated yet. */
return;
}
@@ -3346,9 +3346,9 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
skb_reserve(skb, local->hw.extra_tx_headroom);
resp = skb_put_zero(skb, 24);
- memcpy(resp->da, mgmt->sa, ETH_ALEN);
+ memcpy(resp->da, sdata->vif.cfg.ap_addr, ETH_ALEN);
memcpy(resp->sa, sdata->vif.addr, ETH_ALEN);
- memcpy(resp->bssid, sdata->deflink.u.mgd.bssid, ETH_ALEN);
+ memcpy(resp->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
resp->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
IEEE80211_STYPE_ACTION);
skb_put(skb, 1 + sizeof(resp->u.action.u.sa_query));
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index f83268fa9f92..caa3d0236b5e 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -4,7 +4,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/module.h>
@@ -1335,9 +1335,13 @@ static int _sta_info_move_state(struct sta_info *sta,
sta->sta.addr, new_state);
/* notify the driver before the actual changes so it can
- * fail the transition
+ * fail the transition if the state is increasing.
+ * The driver is required not to fail when the transition
+ * is decreasing the state, so first, do all the preparation
+ * work and only then, notify the driver.
*/
- if (test_sta_flag(sta, WLAN_STA_INSERTED)) {
+ if (new_state > sta->sta_state &&
+ test_sta_flag(sta, WLAN_STA_INSERTED)) {
int err = drv_sta_state(sta->local, sta->sdata, sta,
sta->sta_state, new_state);
if (err)
@@ -1413,6 +1417,16 @@ static int _sta_info_move_state(struct sta_info *sta,
break;
}
+ if (new_state < sta->sta_state &&
+ test_sta_flag(sta, WLAN_STA_INSERTED)) {
+ int err = drv_sta_state(sta->local, sta->sdata, sta,
+ sta->sta_state, new_state);
+
+ WARN_ONCE(err,
+ "Driver is not allowed to fail if the sta_state is transitioning down the list: %d\n",
+ err);
+ }
+
sta->sta_state = new_state;
return 0;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 7f02bd5891eb..fdda14c08e2b 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -6,7 +6,7 @@
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2024 Intel Corporation
+ * Copyright (C) 2018-2025 Intel Corporation
*
* utilities for mac80211
*/
@@ -2193,8 +2193,10 @@ int ieee80211_reconfig(struct ieee80211_local *local)
ieee80211_reconfig_roc(local);
/* Requeue all works */
- list_for_each_entry(sdata, &local->interfaces, list)
- wiphy_work_queue(local->hw.wiphy, &sdata->work);
+ list_for_each_entry(sdata, &local->interfaces, list) {
+ if (ieee80211_sdata_running(sdata))
+ wiphy_work_queue(local->hw.wiphy, &sdata->work);
+ }
}
ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 3f2bd65ff5e3..4c460160914f 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -332,8 +332,14 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
& MCTP_HDR_SEQ_MASK;
if (!key->reasm_head) {
- key->reasm_head = skb;
- key->reasm_tailp = &(skb_shinfo(skb)->frag_list);
+ /* Since we're manipulating the shared frag_list, ensure it isn't
+ * shared with any other SKBs.
+ */
+ key->reasm_head = skb_unshare(skb, GFP_ATOMIC);
+ if (!key->reasm_head)
+ return -ENOMEM;
+
+ key->reasm_tailp = &(skb_shinfo(key->reasm_head)->frag_list);
key->last_seq = this_seq;
return 0;
}
diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c
index 17165b86ce22..06c1897b685a 100644
--- a/net/mctp/test/route-test.c
+++ b/net/mctp/test/route-test.c
@@ -921,6 +921,114 @@ static void mctp_test_route_input_sk_fail_frag(struct kunit *test)
__mctp_route_test_fini(test, dev, rt, sock);
}
+/* Input route to socket, using a fragmented message created from clones.
+ */
+static void mctp_test_route_input_cloned_frag(struct kunit *test)
+{
+ /* 5 packet fragments, forming 2 complete messages */
+ const struct mctp_hdr hdrs[5] = {
+ RX_FRAG(FL_S, 0),
+ RX_FRAG(0, 1),
+ RX_FRAG(FL_E, 2),
+ RX_FRAG(FL_S, 0),
+ RX_FRAG(FL_E, 1),
+ };
+ struct mctp_test_route *rt;
+ struct mctp_test_dev *dev;
+ struct sk_buff *skb[5];
+ struct sk_buff *rx_skb;
+ struct socket *sock;
+ size_t data_len;
+ u8 compare[100];
+ u8 flat[100];
+ size_t total;
+ void *p;
+ int rc;
+
+ /* Arbitrary length */
+ data_len = 3;
+ total = data_len + sizeof(struct mctp_hdr);
+
+ __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY);
+
+ /* Create a single skb initially with concatenated packets */
+ skb[0] = mctp_test_create_skb(&hdrs[0], 5 * total);
+ mctp_test_skb_set_dev(skb[0], dev);
+ memset(skb[0]->data, 0 * 0x11, skb[0]->len);
+ memcpy(skb[0]->data, &hdrs[0], sizeof(struct mctp_hdr));
+
+ /* Extract and populate packets */
+ for (int i = 1; i < 5; i++) {
+ skb[i] = skb_clone(skb[i - 1], GFP_ATOMIC);
+ KUNIT_ASSERT_TRUE(test, skb[i]);
+ p = skb_pull(skb[i], total);
+ KUNIT_ASSERT_TRUE(test, p);
+ skb_reset_network_header(skb[i]);
+ memcpy(skb[i]->data, &hdrs[i], sizeof(struct mctp_hdr));
+ memset(&skb[i]->data[sizeof(struct mctp_hdr)], i * 0x11, data_len);
+ }
+ for (int i = 0; i < 5; i++)
+ skb_trim(skb[i], total);
+
+ /* SOM packets have a type byte to match the socket */
+ skb[0]->data[4] = 0;
+ skb[3]->data[4] = 0;
+
+ skb_dump("pkt1 ", skb[0], false);
+ skb_dump("pkt2 ", skb[1], false);
+ skb_dump("pkt3 ", skb[2], false);
+ skb_dump("pkt4 ", skb[3], false);
+ skb_dump("pkt5 ", skb[4], false);
+
+ for (int i = 0; i < 5; i++) {
+ KUNIT_EXPECT_EQ(test, refcount_read(&skb[i]->users), 1);
+ /* Take a reference so we can check refcounts at the end */
+ skb_get(skb[i]);
+ }
+
+ /* Feed the fragments into MCTP core */
+ for (int i = 0; i < 5; i++) {
+ rc = mctp_route_input(&rt->rt, skb[i]);
+ KUNIT_EXPECT_EQ(test, rc, 0);
+ }
+
+ /* Receive first reassembled message */
+ rx_skb = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc);
+ KUNIT_EXPECT_EQ(test, rc, 0);
+ KUNIT_EXPECT_EQ(test, rx_skb->len, 3 * data_len);
+ rc = skb_copy_bits(rx_skb, 0, flat, rx_skb->len);
+ for (int i = 0; i < rx_skb->len; i++)
+ compare[i] = (i / data_len) * 0x11;
+ /* Set type byte */
+ compare[0] = 0;
+
+ KUNIT_EXPECT_MEMEQ(test, flat, compare, rx_skb->len);
+ KUNIT_EXPECT_EQ(test, refcount_read(&rx_skb->users), 1);
+ kfree_skb(rx_skb);
+
+ /* Receive second reassembled message */
+ rx_skb = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc);
+ KUNIT_EXPECT_EQ(test, rc, 0);
+ KUNIT_EXPECT_EQ(test, rx_skb->len, 2 * data_len);
+ rc = skb_copy_bits(rx_skb, 0, flat, rx_skb->len);
+ for (int i = 0; i < rx_skb->len; i++)
+ compare[i] = (i / data_len + 3) * 0x11;
+ /* Set type byte */
+ compare[0] = 0;
+
+ KUNIT_EXPECT_MEMEQ(test, flat, compare, rx_skb->len);
+ KUNIT_EXPECT_EQ(test, refcount_read(&rx_skb->users), 1);
+ kfree_skb(rx_skb);
+
+ /* Check input skb refcounts */
+ for (int i = 0; i < 5; i++) {
+ KUNIT_EXPECT_EQ(test, refcount_read(&skb[i]->users), 1);
+ kfree_skb(skb[i]);
+ }
+
+ __mctp_route_test_fini(test, dev, rt, sock);
+}
+
#if IS_ENABLED(CONFIG_MCTP_FLOWS)
static void mctp_test_flow_init(struct kunit *test,
@@ -1144,6 +1252,7 @@ static struct kunit_case mctp_test_cases[] = {
KUNIT_CASE(mctp_test_packet_flow),
KUNIT_CASE(mctp_test_fragment_flow),
KUNIT_CASE(mctp_test_route_output_key_create),
+ KUNIT_CASE(mctp_test_route_input_cloned_frag),
{}
};
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 7d13110ce188..0633276d96bf 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3091,12 +3091,12 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
case IP_VS_SO_GET_SERVICES:
{
struct ip_vs_get_services *get;
- int size;
+ size_t size;
get = (struct ip_vs_get_services *)arg;
size = struct_size(get, entrytable, get->num_services);
if (*len != size) {
- pr_err("length: %u != %u\n", *len, size);
+ pr_err("length: %u != %zu\n", *len, size);
ret = -EINVAL;
goto out;
}
@@ -3132,12 +3132,12 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
case IP_VS_SO_GET_DESTS:
{
struct ip_vs_get_dests *get;
- int size;
+ size_t size;
get = (struct ip_vs_get_dests *)arg;
size = struct_size(get, entrytable, get->num_dests);
if (*len != size) {
- pr_err("length: %u != %u\n", *len, size);
+ pr_err("length: %u != %zu\n", *len, size);
ret = -EINVAL;
goto out;
}
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 4890af4dc263..913ede2f57f9 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -132,7 +132,7 @@ static int __nf_conncount_add(struct net *net,
struct nf_conn *found_ct;
unsigned int collect = 0;
- if (time_is_after_eq_jiffies((unsigned long)list->last_gc))
+ if ((u32)jiffies == list->last_gc)
goto add_new_node;
/* check the saved connections */
@@ -234,7 +234,7 @@ bool nf_conncount_gc_list(struct net *net,
bool ret = false;
/* don't bother if we just did GC */
- if (time_is_after_eq_jiffies((unsigned long)READ_ONCE(list->last_gc)))
+ if ((u32)jiffies == READ_ONCE(list->last_gc))
return false;
/* don't bother if other cpu is already doing GC */
@@ -377,6 +377,8 @@ restart:
conn->tuple = *tuple;
conn->zone = *zone;
+ conn->cpu = raw_smp_processor_id();
+ conn->jiffies32 = (u32)jiffies;
memcpy(rbconn->key, key, sizeof(u32) * data->keylen);
nf_conncount_list_init(&rbconn->list);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index a34de9c17cf1..c2df81b7e950 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -34,7 +34,6 @@ unsigned int nf_tables_net_id __read_mostly;
static LIST_HEAD(nf_tables_expressions);
static LIST_HEAD(nf_tables_objects);
static LIST_HEAD(nf_tables_flowtables);
-static LIST_HEAD(nf_tables_destroy_list);
static LIST_HEAD(nf_tables_gc_list);
static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
static DEFINE_SPINLOCK(nf_tables_gc_list_lock);
@@ -125,7 +124,6 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s
table->validate_state = new_validate_state;
}
static void nf_tables_trans_destroy_work(struct work_struct *w);
-static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
static void nft_trans_gc_work(struct work_struct *work);
static DECLARE_WORK(trans_gc_work, nft_trans_gc_work);
@@ -10006,11 +10004,12 @@ static void nft_commit_release(struct nft_trans *trans)
static void nf_tables_trans_destroy_work(struct work_struct *w)
{
+ struct nftables_pernet *nft_net = container_of(w, struct nftables_pernet, destroy_work);
struct nft_trans *trans, *next;
LIST_HEAD(head);
spin_lock(&nf_tables_destroy_list_lock);
- list_splice_init(&nf_tables_destroy_list, &head);
+ list_splice_init(&nft_net->destroy_list, &head);
spin_unlock(&nf_tables_destroy_list_lock);
if (list_empty(&head))
@@ -10024,9 +10023,11 @@ static void nf_tables_trans_destroy_work(struct work_struct *w)
}
}
-void nf_tables_trans_destroy_flush_work(void)
+void nf_tables_trans_destroy_flush_work(struct net *net)
{
- flush_work(&trans_destroy_work);
+ struct nftables_pernet *nft_net = nft_pernet(net);
+
+ flush_work(&nft_net->destroy_work);
}
EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);
@@ -10484,11 +10485,11 @@ static void nf_tables_commit_release(struct net *net)
trans->put_net = true;
spin_lock(&nf_tables_destroy_list_lock);
- list_splice_tail_init(&nft_net->commit_list, &nf_tables_destroy_list);
+ list_splice_tail_init(&nft_net->commit_list, &nft_net->destroy_list);
spin_unlock(&nf_tables_destroy_list_lock);
nf_tables_module_autoload_cleanup(net);
- schedule_work(&trans_destroy_work);
+ schedule_work(&nft_net->destroy_work);
mutex_unlock(&nft_net->commit_mutex);
}
@@ -11853,7 +11854,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
gc_seq = nft_gc_seq_begin(nft_net);
- nf_tables_trans_destroy_flush_work();
+ nf_tables_trans_destroy_flush_work(net);
again:
list_for_each_entry(table, &nft_net->tables, list) {
if (nft_table_has_owner(table) &&
@@ -11895,6 +11896,7 @@ static int __net_init nf_tables_init_net(struct net *net)
INIT_LIST_HEAD(&nft_net->tables);
INIT_LIST_HEAD(&nft_net->commit_list);
+ INIT_LIST_HEAD(&nft_net->destroy_list);
INIT_LIST_HEAD(&nft_net->commit_set_list);
INIT_LIST_HEAD(&nft_net->binding_list);
INIT_LIST_HEAD(&nft_net->module_list);
@@ -11903,6 +11905,7 @@ static int __net_init nf_tables_init_net(struct net *net)
nft_net->base_seq = 1;
nft_net->gc_seq = 0;
nft_net->validate_state = NFT_VALIDATE_SKIP;
+ INIT_WORK(&nft_net->destroy_work, nf_tables_trans_destroy_work);
return 0;
}
@@ -11931,14 +11934,17 @@ static void __net_exit nf_tables_exit_net(struct net *net)
if (!list_empty(&nft_net->module_list))
nf_tables_module_autoload_cleanup(net);
+ cancel_work_sync(&nft_net->destroy_work);
__nft_release_tables(net);
nft_gc_seq_end(nft_net, gc_seq);
mutex_unlock(&nft_net->commit_mutex);
+
WARN_ON_ONCE(!list_empty(&nft_net->tables));
WARN_ON_ONCE(!list_empty(&nft_net->module_list));
WARN_ON_ONCE(!list_empty(&nft_net->notify_list));
+ WARN_ON_ONCE(!list_empty(&nft_net->destroy_list));
}
static void nf_tables_exit_batch(struct list_head *net_exit_list)
@@ -12029,10 +12035,8 @@ static void __exit nf_tables_module_exit(void)
unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
nft_chain_filter_fini();
nft_chain_route_fini();
- nf_tables_trans_destroy_flush_work();
unregister_pernet_subsys(&nf_tables_net_ops);
cancel_work_sync(&trans_gc_work);
- cancel_work_sync(&trans_destroy_work);
rcu_barrier();
rhltable_destroy(&nft_objname_ht);
nf_tables_core_module_exit();
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 7ca4f0d21fe2..72711d62fddf 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -228,7 +228,7 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
return 0;
}
-static void nft_compat_wait_for_destructors(void)
+static void nft_compat_wait_for_destructors(struct net *net)
{
/* xtables matches or targets can have side effects, e.g.
* creation/destruction of /proc files.
@@ -236,7 +236,7 @@ static void nft_compat_wait_for_destructors(void)
* work queue. If we have pending invocations we thus
* need to wait for those to finish.
*/
- nf_tables_trans_destroy_flush_work();
+ nf_tables_trans_destroy_flush_work(net);
}
static int
@@ -262,7 +262,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv);
- nft_compat_wait_for_destructors();
+ nft_compat_wait_for_destructors(ctx->net);
ret = xt_check_target(&par, size, proto, inv);
if (ret < 0) {
@@ -515,7 +515,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv);
- nft_compat_wait_for_destructors();
+ nft_compat_wait_for_destructors(ctx->net);
return xt_check_match(&par, size, proto, inv);
}
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 2e59aba681a1..d526e69a2a2b 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -230,6 +230,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
enum ip_conntrack_info ctinfo;
u16 value = nft_reg_load16(&regs->data[priv->sreg]);
struct nf_conn *ct;
+ int oldcnt;
ct = nf_ct_get(skb, &ctinfo);
if (ct) /* already tracked */
@@ -250,10 +251,11 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
ct = this_cpu_read(nft_ct_pcpu_template);
- if (likely(refcount_read(&ct->ct_general.use) == 1)) {
- refcount_inc(&ct->ct_general.use);
+ __refcount_inc(&ct->ct_general.use, &oldcnt);
+ if (likely(oldcnt == 1)) {
nf_ct_zone_add(ct, &zone);
} else {
+ refcount_dec(&ct->ct_general.use);
/* previous skb got queued to userspace, allocate temporary
* one until percpu template can be reused.
*/
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index b8d03364566c..c74012c99125 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -85,7 +85,6 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb,
unsigned char optbuf[sizeof(struct ip_options) + 40];
struct ip_options *opt = (struct ip_options *)optbuf;
struct iphdr *iph, _iph;
- unsigned int start;
bool found = false;
__be32 info;
int optlen;
@@ -93,7 +92,6 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb,
iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
if (!iph)
return -EBADMSG;
- start = sizeof(struct iphdr);
optlen = iph->ihl * 4 - (int)sizeof(struct iphdr);
if (optlen <= 0)
@@ -103,7 +101,7 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb,
/* Copy the options since __ip_options_compile() modifies
* the options.
*/
- if (skb_copy_bits(skb, start, opt->__data, optlen))
+ if (skb_copy_bits(skb, sizeof(struct iphdr), opt->__data, optlen))
return -EBADMSG;
opt->optlen = optlen;
@@ -118,18 +116,18 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb,
found = target == IPOPT_SSRR ? opt->is_strictroute :
!opt->is_strictroute;
if (found)
- *offset = opt->srr + start;
+ *offset = opt->srr;
break;
case IPOPT_RR:
if (!opt->rr)
break;
- *offset = opt->rr + start;
+ *offset = opt->rr;
found = true;
break;
case IPOPT_RA:
if (!opt->router_alert)
break;
- *offset = opt->router_alert + start;
+ *offset = opt->router_alert;
found = true;
break;
default:
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 3bb4810234aa..e573e9221302 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1368,8 +1368,11 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
attr == OVS_KEY_ATTR_CT_MARK)
return true;
if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
- attr == OVS_KEY_ATTR_CT_LABELS)
- return true;
+ attr == OVS_KEY_ATTR_CT_LABELS) {
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+
+ return ovs_net->xt_label;
+ }
return false;
}
@@ -1378,7 +1381,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa, bool log)
{
- unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
struct ovs_conntrack_info ct_info;
const char *helper = NULL;
u16 family;
@@ -1407,12 +1409,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
return -ENOMEM;
}
- if (nf_connlabels_get(net, n_bits - 1)) {
- nf_ct_tmpl_free(ct_info.ct);
- OVS_NLERR(log, "Failed to set connlabel length");
- return -EOPNOTSUPP;
- }
-
if (ct_info.timeout[0]) {
if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
ct_info.timeout))
@@ -1581,7 +1577,6 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
if (ct_info->ct) {
if (ct_info->timeout[0])
nf_ct_destroy_timeout(ct_info->ct);
- nf_connlabels_put(nf_ct_net(ct_info->ct));
nf_ct_tmpl_free(ct_info->ct);
}
}
@@ -2006,9 +2001,17 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
int ovs_ct_init(struct net *net)
{
-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
+ unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+ if (nf_connlabels_get(net, n_bits - 1)) {
+ ovs_net->xt_label = false;
+ OVS_NLERR(true, "Failed to set connlabel length");
+ } else {
+ ovs_net->xt_label = true;
+ }
+
+#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
return ovs_ct_limit_init(net, ovs_net);
#else
return 0;
@@ -2017,9 +2020,12 @@ int ovs_ct_init(struct net *net)
void ovs_ct_exit(struct net *net)
{
-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
ovs_ct_limit_exit(net, ovs_net);
#endif
+
+ if (ovs_net->xt_label)
+ nf_connlabels_put(net);
}
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 365b9bb7f546..9ca6231ea647 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -160,6 +160,9 @@ struct ovs_net {
#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
struct ovs_ct_limit_info *ct_limit_info;
#endif
+
+ /* Module reference for configuring conntrack. */
+ bool xt_label;
};
/**
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 881ddd3696d5..95e0dd14dc1a 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2317,14 +2317,10 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
OVS_FLOW_ATTR_MASK, true, skb);
}
-#define MAX_ACTIONS_BUFSIZE (32 * 1024)
-
static struct sw_flow_actions *nla_alloc_flow_actions(int size)
{
struct sw_flow_actions *sfa;
- WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE);
-
sfa = kmalloc(kmalloc_size_roundup(sizeof(*sfa) + size), GFP_KERNEL);
if (!sfa)
return ERR_PTR(-ENOMEM);
@@ -2480,15 +2476,6 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2);
- if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
- if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) {
- OVS_NLERR(log, "Flow action size exceeds max %u",
- MAX_ACTIONS_BUFSIZE);
- return ERR_PTR(-EMSGSIZE);
- }
- new_acts_size = MAX_ACTIONS_BUFSIZE;
- }
-
acts = nla_alloc_flow_actions(new_acts_size);
if (IS_ERR(acts))
return ERR_CAST(acts);
@@ -3545,7 +3532,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
int err;
u32 mpls_label_count = 0;
- *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
+ *sfa = nla_alloc_flow_actions(nla_len(attr));
if (IS_ERR(*sfa))
return PTR_ERR(*sfa);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index e3e91cf867eb..6c625dcd0651 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -2254,6 +2254,12 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
return -EOPNOTSUPP;
}
+ /* Prevent creation of traffic classes with classid TC_H_ROOT */
+ if (clid == TC_H_ROOT) {
+ NL_SET_ERR_MSG(extack, "Cannot create traffic class with classid TC_H_ROOT");
+ return -EINVAL;
+ }
+
new_cl = cl;
err = -EOPNOTSUPP;
if (cops->change)
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index ab6234b4fcd5..532fde548b88 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -913,7 +913,8 @@ static void gred_destroy(struct Qdisc *sch)
for (i = 0; i < table->DPs; i++)
gred_destroy_vq(table->tab[i]);
- gred_offload(sch, TC_GRED_DESTROY);
+ if (table->opt)
+ gred_offload(sch, TC_GRED_DESTROY);
kfree(table->opt);
}
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 6488ead9e464..4d5fbacef496 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -472,7 +472,7 @@ bool switchdev_port_obj_act_is_deferred(struct net_device *dev,
EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred);
static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
-static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain);
+static RAW_NOTIFIER_HEAD(switchdev_blocking_notif_chain);
/**
* register_switchdev_notifier - Register notifier
@@ -518,17 +518,27 @@ EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
int register_switchdev_blocking_notifier(struct notifier_block *nb)
{
- struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain;
+ struct raw_notifier_head *chain = &switchdev_blocking_notif_chain;
+ int err;
+
+ rtnl_lock();
+ err = raw_notifier_chain_register(chain, nb);
+ rtnl_unlock();
- return blocking_notifier_chain_register(chain, nb);
+ return err;
}
EXPORT_SYMBOL_GPL(register_switchdev_blocking_notifier);
int unregister_switchdev_blocking_notifier(struct notifier_block *nb)
{
- struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain;
+ struct raw_notifier_head *chain = &switchdev_blocking_notif_chain;
+ int err;
- return blocking_notifier_chain_unregister(chain, nb);
+ rtnl_lock();
+ err = raw_notifier_chain_unregister(chain, nb);
+ rtnl_unlock();
+
+ return err;
}
EXPORT_SYMBOL_GPL(unregister_switchdev_blocking_notifier);
@@ -536,10 +546,11 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev,
struct switchdev_notifier_info *info,
struct netlink_ext_ack *extack)
{
+ ASSERT_RTNL();
info->dev = dev;
info->extack = extack;
- return blocking_notifier_call_chain(&switchdev_blocking_notif_chain,
- val, info);
+ return raw_notifier_call_chain(&switchdev_blocking_notif_chain,
+ val, info);
}
EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 12b780de8779..828e29872633 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1191,6 +1191,13 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev)
{
struct cfg80211_internal_bss *scan, *tmp;
struct cfg80211_beacon_registration *reg, *treg;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rdev->wiphy_work_lock, flags);
+ WARN_ON(!list_empty(&rdev->wiphy_work_list));
+ spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags);
+ cancel_work_sync(&rdev->wiphy_work);
+
rfkill_destroy(rdev->wiphy.rfkill);
list_for_each_entry_safe(reg, treg, &rdev->beacon_registrations, list) {
list_del(&reg->list);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index e87267fbb442..aac0e7298dc7 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -11123,6 +11123,7 @@ static struct cfg80211_bss *nl80211_assoc_bss(struct cfg80211_registered_device
static int nl80211_process_links(struct cfg80211_registered_device *rdev,
struct cfg80211_assoc_link *links,
+ int assoc_link_id,
const u8 *ssid, int ssid_len,
struct genl_info *info)
{
@@ -11153,7 +11154,7 @@ static int nl80211_process_links(struct cfg80211_registered_device *rdev,
}
links[link_id].bss =
nl80211_assoc_bss(rdev, ssid, ssid_len, attrs,
- link_id, link_id);
+ assoc_link_id, link_id);
if (IS_ERR(links[link_id].bss)) {
err = PTR_ERR(links[link_id].bss);
links[link_id].bss = NULL;
@@ -11350,8 +11351,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
req.ap_mld_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]);
ap_addr = req.ap_mld_addr;
- err = nl80211_process_links(rdev, req.links, ssid, ssid_len,
- info);
+ err = nl80211_process_links(rdev, req.links, req.link_id,
+ ssid, ssid_len, info);
if (err)
goto free;
@@ -16506,7 +16507,10 @@ static int nl80211_assoc_ml_reconf(struct sk_buff *skb, struct genl_info *info)
add_links = 0;
if (info->attrs[NL80211_ATTR_MLO_LINKS]) {
- err = nl80211_process_links(rdev, links, NULL, 0, info);
+ err = nl80211_process_links(rdev, links,
+ /* mark as MLO, but not assoc */
+ IEEE80211_MLD_MAX_NUM_LINKS,
+ NULL, 0, info);
if (err)
return err;
diff --git a/rust/kernel/alloc/allocator_test.rs b/rust/kernel/alloc/allocator_test.rs
index e3240d16040b..c37d4c0c64e9 100644
--- a/rust/kernel/alloc/allocator_test.rs
+++ b/rust/kernel/alloc/allocator_test.rs
@@ -62,6 +62,24 @@ unsafe impl Allocator for Cmalloc {
));
}
+ // ISO C (ISO/IEC 9899:2011) defines `aligned_alloc`:
+ //
+ // > The value of alignment shall be a valid alignment supported by the implementation
+ // [...].
+ //
+ // As an example of the "supported by the implementation" requirement, POSIX.1-2001 (IEEE
+ // 1003.1-2001) defines `posix_memalign`:
+ //
+ // > The value of alignment shall be a power of two multiple of sizeof (void *).
+ //
+ // and POSIX-based implementations of `aligned_alloc` inherit this requirement. At the time
+ // of writing, this is known to be the case on macOS (but not in glibc).
+ //
+ // Satisfy the stricter requirement to avoid spurious test failures on some platforms.
+ let min_align = core::mem::size_of::<*const crate::ffi::c_void>();
+ let layout = layout.align_to(min_align).map_err(|_| AllocError)?;
+ let layout = layout.pad_to_align();
+
// SAFETY: Returns either NULL or a pointer to a memory allocation that satisfies or
// exceeds the given size and alignment requirements.
let dst = unsafe { libc_aligned_alloc(layout.align(), layout.size()) } as *mut u8;
diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs
index f6ecf09cb65f..a194d83e6835 100644
--- a/rust/kernel/error.rs
+++ b/rust/kernel/error.rs
@@ -107,7 +107,7 @@ impl Error {
} else {
// TODO: Make it a `WARN_ONCE` once available.
crate::pr_warn!(
- "attempted to create `Error` with out of range `errno`: {}",
+ "attempted to create `Error` with out of range `errno`: {}\n",
errno
);
code::EINVAL
diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs
index 7fd1ea8265a5..e25d047f3c82 100644
--- a/rust/kernel/init.rs
+++ b/rust/kernel/init.rs
@@ -259,7 +259,7 @@ pub mod macros;
/// },
/// }));
/// let foo: Pin<&mut Foo> = foo;
-/// pr_info!("a: {}", &*foo.a.lock());
+/// pr_info!("a: {}\n", &*foo.a.lock());
/// ```
///
/// # Syntax
@@ -319,7 +319,7 @@ macro_rules! stack_pin_init {
/// }, GFP_KERNEL)?,
/// }));
/// let foo = foo.unwrap();
-/// pr_info!("a: {}", &*foo.a.lock());
+/// pr_info!("a: {}\n", &*foo.a.lock());
/// ```
///
/// ```rust,ignore
@@ -352,7 +352,7 @@ macro_rules! stack_pin_init {
/// x: 64,
/// }, GFP_KERNEL)?,
/// }));
-/// pr_info!("a: {}", &*foo.a.lock());
+/// pr_info!("a: {}\n", &*foo.a.lock());
/// # Ok::<_, AllocError>(())
/// ```
///
@@ -882,7 +882,7 @@ pub unsafe trait PinInit<T: ?Sized, E = Infallible>: Sized {
///
/// impl Foo {
/// fn setup(self: Pin<&mut Self>) {
- /// pr_info!("Setting up foo");
+ /// pr_info!("Setting up foo\n");
/// }
/// }
///
@@ -986,7 +986,7 @@ pub unsafe trait Init<T: ?Sized, E = Infallible>: PinInit<T, E> {
///
/// impl Foo {
/// fn setup(&mut self) {
- /// pr_info!("Setting up foo");
+ /// pr_info!("Setting up foo\n");
/// }
/// }
///
@@ -1336,7 +1336,7 @@ impl<T> InPlaceWrite<T> for UniqueArc<MaybeUninit<T>> {
/// #[pinned_drop]
/// impl PinnedDrop for Foo {
/// fn drop(self: Pin<&mut Self>) {
-/// pr_info!("Foo is being dropped!");
+/// pr_info!("Foo is being dropped!\n");
/// }
/// }
/// ```
@@ -1418,17 +1418,14 @@ impl_zeroable! {
// SAFETY: `T: Zeroable` and `UnsafeCell` is `repr(transparent)`.
{<T: ?Sized + Zeroable>} UnsafeCell<T>,
- // SAFETY: All zeros is equivalent to `None` (option layout optimization guarantee).
+ // SAFETY: All zeros is equivalent to `None` (option layout optimization guarantee:
+ // https://doc.rust-lang.org/stable/std/option/index.html#representation).
Option<NonZeroU8>, Option<NonZeroU16>, Option<NonZeroU32>, Option<NonZeroU64>,
Option<NonZeroU128>, Option<NonZeroUsize>,
Option<NonZeroI8>, Option<NonZeroI16>, Option<NonZeroI32>, Option<NonZeroI64>,
Option<NonZeroI128>, Option<NonZeroIsize>,
-
- // SAFETY: All zeros is equivalent to `None` (option layout optimization guarantee).
- //
- // In this case we are allowed to use `T: ?Sized`, since all zeros is the `None` variant.
- {<T: ?Sized>} Option<NonNull<T>>,
- {<T: ?Sized>} Option<KBox<T>>,
+ {<T>} Option<NonNull<T>>,
+ {<T>} Option<KBox<T>>,
// SAFETY: `null` pointer is valid.
//
diff --git a/rust/kernel/init/macros.rs b/rust/kernel/init/macros.rs
index 1fd146a83241..b7213962a6a5 100644
--- a/rust/kernel/init/macros.rs
+++ b/rust/kernel/init/macros.rs
@@ -45,7 +45,7 @@
//! #[pinned_drop]
//! impl PinnedDrop for Foo {
//! fn drop(self: Pin<&mut Self>) {
-//! pr_info!("{self:p} is getting dropped.");
+//! pr_info!("{self:p} is getting dropped.\n");
//! }
//! }
//!
@@ -412,7 +412,7 @@
//! #[pinned_drop]
//! impl PinnedDrop for Foo {
//! fn drop(self: Pin<&mut Self>) {
-//! pr_info!("{self:p} is getting dropped.");
+//! pr_info!("{self:p} is getting dropped.\n");
//! }
//! }
//! ```
@@ -423,7 +423,7 @@
//! // `unsafe`, full path and the token parameter are added, everything else stays the same.
//! unsafe impl ::kernel::init::PinnedDrop for Foo {
//! fn drop(self: Pin<&mut Self>, _: ::kernel::init::__internal::OnlyCallFromDrop) {
-//! pr_info!("{self:p} is getting dropped.");
+//! pr_info!("{self:p} is getting dropped.\n");
//! }
//! }
//! ```
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index 398242f92a96..7697c60b2d1a 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -6,7 +6,7 @@
//! usage by Rust code in the kernel and is shared by all of them.
//!
//! In other words, all the rest of the Rust code in the kernel (e.g. kernel
-//! modules written in Rust) depends on [`core`], [`alloc`] and this crate.
+//! modules written in Rust) depends on [`core`] and this crate.
//!
//! If you need a kernel C API that is not ported or wrapped yet here, then
//! do so first instead of bypassing this crate.
diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs
index 3498fb344dc9..16eab9138b2b 100644
--- a/rust/kernel/sync.rs
+++ b/rust/kernel/sync.rs
@@ -30,28 +30,20 @@ pub struct LockClassKey(Opaque<bindings::lock_class_key>);
unsafe impl Sync for LockClassKey {}
impl LockClassKey {
- /// Creates a new lock class key.
- pub const fn new() -> Self {
- Self(Opaque::uninit())
- }
-
pub(crate) fn as_ptr(&self) -> *mut bindings::lock_class_key {
self.0.get()
}
}
-impl Default for LockClassKey {
- fn default() -> Self {
- Self::new()
- }
-}
-
/// Defines a new static lock class and returns a pointer to it.
#[doc(hidden)]
#[macro_export]
macro_rules! static_lock_class {
() => {{
- static CLASS: $crate::sync::LockClassKey = $crate::sync::LockClassKey::new();
+ static CLASS: $crate::sync::LockClassKey =
+ // SAFETY: lockdep expects uninitialized memory when it's handed a statically allocated
+ // lock_class_key
+ unsafe { ::core::mem::MaybeUninit::uninit().assume_init() };
&CLASS
}};
}
diff --git a/rust/kernel/sync/locked_by.rs b/rust/kernel/sync/locked_by.rs
index a7b244675c2b..61f100a45b35 100644
--- a/rust/kernel/sync/locked_by.rs
+++ b/rust/kernel/sync/locked_by.rs
@@ -55,7 +55,7 @@ use core::{cell::UnsafeCell, mem::size_of, ptr};
/// fn print_bytes_used(dir: &Directory, file: &File) {
/// let guard = dir.inner.lock();
/// let inner_file = file.inner.access(&guard);
-/// pr_info!("{} {}", guard.bytes_used, inner_file.bytes_used);
+/// pr_info!("{} {}\n", guard.bytes_used, inner_file.bytes_used);
/// }
///
/// /// Increments `bytes_used` for both the directory and file.
diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs
index 07bc22a7645c..38da555a2bdb 100644
--- a/rust/kernel/task.rs
+++ b/rust/kernel/task.rs
@@ -320,7 +320,7 @@ impl Task {
/// Wakes up the task.
pub fn wake_up(&self) {
- // SAFETY: It's always safe to call `signal_pending` on a valid task, even if the task
+ // SAFETY: It's always safe to call `wake_up_process` on a valid task, even if the task
// running.
unsafe { bindings::wake_up_process(self.as_ptr()) };
}
diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs
index 0cd100d2aefb..b7be224cdf4b 100644
--- a/rust/kernel/workqueue.rs
+++ b/rust/kernel/workqueue.rs
@@ -60,7 +60,7 @@
//! type Pointer = Arc<MyStruct>;
//!
//! fn run(this: Arc<MyStruct>) {
-//! pr_info!("The value is: {}", this.value);
+//! pr_info!("The value is: {}\n", this.value);
//! }
//! }
//!
@@ -108,7 +108,7 @@
//! type Pointer = Arc<MyStruct>;
//!
//! fn run(this: Arc<MyStruct>) {
-//! pr_info!("The value is: {}", this.value_1);
+//! pr_info!("The value is: {}\n", this.value_1);
//! }
//! }
//!
@@ -116,7 +116,7 @@
//! type Pointer = Arc<MyStruct>;
//!
//! fn run(this: Arc<MyStruct>) {
-//! pr_info!("The second value is: {}", this.value_2);
+//! pr_info!("The second value is: {}\n", this.value_2);
//! }
//! }
//!
diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py
index aa8ea1a4dbe5..adae71544cbd 100755
--- a/scripts/generate_rust_analyzer.py
+++ b/scripts/generate_rust_analyzer.py
@@ -57,14 +57,26 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs):
crates_indexes[display_name] = len(crates)
crates.append(crate)
- # First, the ones in `rust/` since they are a bit special.
- append_crate(
- "core",
- sysroot_src / "core" / "src" / "lib.rs",
- [],
- cfg=crates_cfgs.get("core", []),
- is_workspace_member=False,
- )
+ def append_sysroot_crate(
+ display_name,
+ deps,
+ cfg=[],
+ ):
+ append_crate(
+ display_name,
+ sysroot_src / display_name / "src" / "lib.rs",
+ deps,
+ cfg,
+ is_workspace_member=False,
+ )
+
+ # NB: sysroot crates reexport items from one another so setting up our transitive dependencies
+ # here is important for ensuring that rust-analyzer can resolve symbols. The sources of truth
+ # for this dependency graph are `(sysroot_src / crate / "Cargo.toml" for crate in crates)`.
+ append_sysroot_crate("core", [], cfg=crates_cfgs.get("core", []))
+ append_sysroot_crate("alloc", ["core"])
+ append_sysroot_crate("std", ["alloc", "core"])
+ append_sysroot_crate("proc_macro", ["core", "std"])
append_crate(
"compiler_builtins",
@@ -75,7 +87,7 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs):
append_crate(
"macros",
srctree / "rust" / "macros" / "lib.rs",
- [],
+ ["std", "proc_macro"],
is_proc_macro=True,
)
@@ -85,27 +97,28 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs):
["core", "compiler_builtins"],
)
- append_crate(
- "bindings",
- srctree / "rust"/ "bindings" / "lib.rs",
- ["core"],
- cfg=cfg,
- )
- crates[-1]["env"]["OBJTREE"] = str(objtree.resolve(True))
+ def append_crate_with_generated(
+ display_name,
+ deps,
+ ):
+ append_crate(
+ display_name,
+ srctree / "rust"/ display_name / "lib.rs",
+ deps,
+ cfg=cfg,
+ )
+ crates[-1]["env"]["OBJTREE"] = str(objtree.resolve(True))
+ crates[-1]["source"] = {
+ "include_dirs": [
+ str(srctree / "rust" / display_name),
+ str(objtree / "rust")
+ ],
+ "exclude_dirs": [],
+ }
- append_crate(
- "kernel",
- srctree / "rust" / "kernel" / "lib.rs",
- ["core", "macros", "build_error", "bindings"],
- cfg=cfg,
- )
- crates[-1]["source"] = {
- "include_dirs": [
- str(srctree / "rust" / "kernel"),
- str(objtree / "rust")
- ],
- "exclude_dirs": [],
- }
+ append_crate_with_generated("bindings", ["core"])
+ append_crate_with_generated("uapi", ["core"])
+ append_crate_with_generated("kernel", ["core", "macros", "build_error", "bindings", "uapi"])
def is_root_crate(build_file, target):
try:
diff --git a/scripts/rustdoc_test_gen.rs b/scripts/rustdoc_test_gen.rs
index 5ebd42ae4a3f..76aaa8329413 100644
--- a/scripts/rustdoc_test_gen.rs
+++ b/scripts/rustdoc_test_gen.rs
@@ -15,8 +15,8 @@
//! - Test code should be able to define functions and call them, without having to carry
//! the context.
//!
-//! - Later on, we may want to be able to test non-kernel code (e.g. `core`, `alloc` or
-//! third-party crates) which likely use the standard library `assert*!` macros.
+//! - Later on, we may want to be able to test non-kernel code (e.g. `core` or third-party
+//! crates) which likely use the standard library `assert*!` macros.
//!
//! For this reason, instead of the passed context, `kunit_get_current_test()` is used instead
//! (i.e. `current->kunit_test`).
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index d2a1f836dbbf..a84857a3c2bf 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4790,6 +4790,21 @@ static void alc236_fixup_hp_coef_micmute_led(struct hda_codec *codec,
}
}
+static void alc295_fixup_hp_mute_led_coefbit11(struct hda_codec *codec,
+ const struct hda_fixup *fix, int action)
+{
+ struct alc_spec *spec = codec->spec;
+
+ if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+ spec->mute_led_polarity = 0;
+ spec->mute_led_coef.idx = 0xb;
+ spec->mute_led_coef.mask = 3 << 3;
+ spec->mute_led_coef.on = 1 << 3;
+ spec->mute_led_coef.off = 1 << 4;
+ snd_hda_gen_add_mute_led_cdev(codec, coef_mute_led_set);
+ }
+}
+
static void alc285_fixup_hp_mute_led(struct hda_codec *codec,
const struct hda_fixup *fix, int action)
{
@@ -7656,6 +7671,7 @@ enum {
ALC290_FIXUP_MONO_SPEAKERS_HSJACK,
ALC290_FIXUP_SUBWOOFER,
ALC290_FIXUP_SUBWOOFER_HSJACK,
+ ALC295_FIXUP_HP_MUTE_LED_COEFBIT11,
ALC269_FIXUP_THINKPAD_ACPI,
ALC269_FIXUP_LENOVO_XPAD_ACPI,
ALC269_FIXUP_DMIC_THINKPAD_ACPI,
@@ -9401,6 +9417,10 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC283_FIXUP_INT_MIC,
},
+ [ALC295_FIXUP_HP_MUTE_LED_COEFBIT11] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc295_fixup_hp_mute_led_coefbit11,
+ },
[ALC298_FIXUP_SAMSUNG_AMP] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc298_fixup_samsung_amp,
@@ -10451,6 +10471,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3),
SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360),
SND_PCI_QUIRK(0x103c, 0x8537, "HP ProBook 440 G6", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x85c6, "HP Pavilion x360 Convertible 14-dy1xxx", ALC295_FIXUP_HP_MUTE_LED_COEFBIT11),
SND_PCI_QUIRK(0x103c, 0x85de, "HP Envy x360 13-ar0xxx", ALC285_FIXUP_HP_ENVY_X360),
SND_PCI_QUIRK(0x103c, 0x860f, "HP ZBook 15 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT),
SND_PCI_QUIRK(0x103c, 0x861f, "HP Elite Dragonfly G1", ALC285_FIXUP_HP_GPIO_AMP_INIT),
diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c
index b16587d8f97a..a7637056972a 100644
--- a/sound/soc/amd/yc/acp6x-mach.c
+++ b/sound/soc/amd/yc/acp6x-mach.c
@@ -252,6 +252,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
.driver_data = &acp6x_card,
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "21M6"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
DMI_MATCH(DMI_PRODUCT_NAME, "21ME"),
}
},
diff --git a/sound/soc/codecs/cs42l43-jack.c b/sound/soc/codecs/cs42l43-jack.c
index d9ab003e166b..ac19a572fe70 100644
--- a/sound/soc/codecs/cs42l43-jack.c
+++ b/sound/soc/codecs/cs42l43-jack.c
@@ -167,7 +167,7 @@ int cs42l43_set_jack(struct snd_soc_component *component,
autocontrol |= 0x3 << CS42L43_JACKDET_MODE_SHIFT;
ret = cs42l43_find_index(priv, "cirrus,tip-fall-db-ms", 500,
- NULL, cs42l43_accdet_db_ms,
+ &priv->tip_fall_db_ms, cs42l43_accdet_db_ms,
ARRAY_SIZE(cs42l43_accdet_db_ms));
if (ret < 0)
goto error;
@@ -175,7 +175,7 @@ int cs42l43_set_jack(struct snd_soc_component *component,
tip_deb |= ret << CS42L43_TIPSENSE_FALLING_DB_TIME_SHIFT;
ret = cs42l43_find_index(priv, "cirrus,tip-rise-db-ms", 500,
- NULL, cs42l43_accdet_db_ms,
+ &priv->tip_rise_db_ms, cs42l43_accdet_db_ms,
ARRAY_SIZE(cs42l43_accdet_db_ms));
if (ret < 0)
goto error;
@@ -764,6 +764,8 @@ void cs42l43_tip_sense_work(struct work_struct *work)
error:
mutex_unlock(&priv->jack_lock);
+ priv->suspend_jack_debounce = false;
+
pm_runtime_mark_last_busy(priv->dev);
pm_runtime_put_autosuspend(priv->dev);
}
@@ -771,14 +773,19 @@ error:
irqreturn_t cs42l43_tip_sense(int irq, void *data)
{
struct cs42l43_codec *priv = data;
+ unsigned int db_delay = priv->tip_debounce_ms;
cancel_delayed_work(&priv->bias_sense_timeout);
cancel_delayed_work(&priv->tip_sense_work);
cancel_delayed_work(&priv->button_press_work);
cancel_work(&priv->button_release_work);
+ // Ensure delay after suspend is long enough to avoid false detection
+ if (priv->suspend_jack_debounce)
+ db_delay += priv->tip_fall_db_ms + priv->tip_rise_db_ms;
+
queue_delayed_work(system_long_wq, &priv->tip_sense_work,
- msecs_to_jiffies(priv->tip_debounce_ms));
+ msecs_to_jiffies(db_delay));
return IRQ_HANDLED;
}
diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c
index d2a2daefc2ec..ea84ac64c775 100644
--- a/sound/soc/codecs/cs42l43.c
+++ b/sound/soc/codecs/cs42l43.c
@@ -1146,7 +1146,7 @@ static const struct snd_kcontrol_new cs42l43_controls[] = {
SOC_DOUBLE_R_SX_TLV("ADC Volume", CS42L43_ADC_B_CTRL1, CS42L43_ADC_B_CTRL2,
CS42L43_ADC_PGA_GAIN_SHIFT,
- 0xF, 5, cs42l43_adc_tlv),
+ 0xF, 4, cs42l43_adc_tlv),
SOC_DOUBLE("PDM1 Invert Switch", CS42L43_DMIC_PDM_CTRL,
CS42L43_PDM1L_INV_SHIFT, CS42L43_PDM1R_INV_SHIFT, 1, 0),
@@ -2402,9 +2402,22 @@ static int cs42l43_codec_runtime_resume(struct device *dev)
return 0;
}
+static int cs42l43_codec_runtime_force_suspend(struct device *dev)
+{
+ struct cs42l43_codec *priv = dev_get_drvdata(dev);
+
+ dev_dbg(priv->dev, "Runtime suspend\n");
+
+ priv->suspend_jack_debounce = true;
+
+ pm_runtime_force_suspend(dev);
+
+ return 0;
+}
+
static const struct dev_pm_ops cs42l43_codec_pm_ops = {
RUNTIME_PM_OPS(NULL, cs42l43_codec_runtime_resume, NULL)
- SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume)
+ SYSTEM_SLEEP_PM_OPS(cs42l43_codec_runtime_force_suspend, pm_runtime_force_resume)
};
static const struct platform_device_id cs42l43_codec_id_table[] = {
diff --git a/sound/soc/codecs/cs42l43.h b/sound/soc/codecs/cs42l43.h
index 9c144e129535..1cd9d8a71c43 100644
--- a/sound/soc/codecs/cs42l43.h
+++ b/sound/soc/codecs/cs42l43.h
@@ -78,6 +78,8 @@ struct cs42l43_codec {
bool use_ring_sense;
unsigned int tip_debounce_ms;
+ unsigned int tip_fall_db_ms;
+ unsigned int tip_rise_db_ms;
unsigned int bias_low;
unsigned int bias_sense_ua;
unsigned int bias_ramp_ms;
@@ -95,6 +97,7 @@ struct cs42l43_codec {
bool button_detect_running;
bool jack_present;
int jack_override;
+ bool suspend_jack_debounce;
struct work_struct hp_ilimit_work;
struct delayed_work hp_ilimit_clear_work;
diff --git a/sound/soc/codecs/rt1320-sdw.c b/sound/soc/codecs/rt1320-sdw.c
index 3510c3819074..d83b236a0450 100644
--- a/sound/soc/codecs/rt1320-sdw.c
+++ b/sound/soc/codecs/rt1320-sdw.c
@@ -535,6 +535,9 @@ static int rt1320_read_prop(struct sdw_slave *slave)
/* set the timeout values */
prop->clk_stop_timeout = 64;
+ /* BIOS may set wake_capable. Make sure it is 0 as wake events are disabled. */
+ prop->wake_capable = 0;
+
return 0;
}
diff --git a/sound/soc/codecs/rt722-sdca-sdw.c b/sound/soc/codecs/rt722-sdca-sdw.c
index 25fc13687bc8..4d3043627bd0 100644
--- a/sound/soc/codecs/rt722-sdca-sdw.c
+++ b/sound/soc/codecs/rt722-sdca-sdw.c
@@ -86,6 +86,10 @@ static bool rt722_sdca_mbq_readable_register(struct device *dev, unsigned int re
case 0x6100067:
case 0x6100070 ... 0x610007c:
case 0x6100080:
+ case SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_FU15, RT722_SDCA_CTL_FU_CH_GAIN,
+ CH_01) ...
+ SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_FU15, RT722_SDCA_CTL_FU_CH_GAIN,
+ CH_04):
case SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_USER_FU1E, RT722_SDCA_CTL_FU_VOLUME,
CH_01):
case SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT722_SDCA_ENT_USER_FU1E, RT722_SDCA_CTL_FU_VOLUME,
diff --git a/sound/soc/codecs/wm0010.c b/sound/soc/codecs/wm0010.c
index edd2cb185c42..9e67fbfc2cca 100644
--- a/sound/soc/codecs/wm0010.c
+++ b/sound/soc/codecs/wm0010.c
@@ -920,7 +920,7 @@ static int wm0010_spi_probe(struct spi_device *spi)
if (ret) {
dev_err(wm0010->dev, "Failed to set IRQ %d as wake source: %d\n",
irq, ret);
- return ret;
+ goto free_irq;
}
if (spi->max_speed_hz)
@@ -932,9 +932,18 @@ static int wm0010_spi_probe(struct spi_device *spi)
&soc_component_dev_wm0010, wm0010_dai,
ARRAY_SIZE(wm0010_dai));
if (ret < 0)
- return ret;
+ goto disable_irq_wake;
return 0;
+
+disable_irq_wake:
+ irq_set_irq_wake(wm0010->irq, 0);
+
+free_irq:
+ if (wm0010->irq)
+ free_irq(wm0010->irq, wm0010);
+
+ return ret;
}
static void wm0010_spi_remove(struct spi_device *spi)
diff --git a/sound/soc/codecs/wsa884x.c b/sound/soc/codecs/wsa884x.c
index 86df5152c547..560a2c04b695 100644
--- a/sound/soc/codecs/wsa884x.c
+++ b/sound/soc/codecs/wsa884x.c
@@ -1875,7 +1875,7 @@ static int wsa884x_get_temp(struct wsa884x_priv *wsa884x, long *temp)
* Reading temperature is possible only when Power Amplifier is
* off. Report last cached data.
*/
- *temp = wsa884x->temperature;
+ *temp = wsa884x->temperature * 1000;
return 0;
}
@@ -1934,7 +1934,7 @@ static int wsa884x_get_temp(struct wsa884x_priv *wsa884x, long *temp)
if ((val > WSA884X_LOW_TEMP_THRESHOLD) &&
(val < WSA884X_HIGH_TEMP_THRESHOLD)) {
wsa884x->temperature = val;
- *temp = val;
+ *temp = val * 1000;
ret = 0;
} else {
ret = -EAGAIN;
diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c
index c13064c77726..90dafa810b2e 100644
--- a/sound/soc/intel/boards/sof_sdw.c
+++ b/sound/soc/intel/boards/sof_sdw.c
@@ -954,7 +954,7 @@ static int create_sdw_dailinks(struct snd_soc_card *card,
/* generate DAI links by each sdw link */
while (sof_dais->initialised) {
- int current_be_id;
+ int current_be_id = 0;
ret = create_sdw_dailink(card, sof_dais, dai_links,
&current_be_id, codec_conf);
diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c
index 19928f098d8d..b0e4e4168f38 100644
--- a/sound/soc/soc-ops.c
+++ b/sound/soc/soc-ops.c
@@ -337,7 +337,7 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol,
if (ucontrol->value.integer.value[0] < 0)
return -EINVAL;
val = ucontrol->value.integer.value[0];
- if (mc->platform_max && ((int)val + min) > mc->platform_max)
+ if (mc->platform_max && val > mc->platform_max)
return -EINVAL;
if (val > max - min)
return -EINVAL;
@@ -350,7 +350,7 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol,
if (ucontrol->value.integer.value[1] < 0)
return -EINVAL;
val2 = ucontrol->value.integer.value[1];
- if (mc->platform_max && ((int)val2 + min) > mc->platform_max)
+ if (mc->platform_max && val2 > mc->platform_max)
return -EINVAL;
if (val2 > max - min)
return -EINVAL;
@@ -503,17 +503,16 @@ int snd_soc_info_volsw_range(struct snd_kcontrol *kcontrol,
{
struct soc_mixer_control *mc =
(struct soc_mixer_control *)kcontrol->private_value;
- int platform_max;
- int min = mc->min;
+ int max;
- if (!mc->platform_max)
- mc->platform_max = mc->max;
- platform_max = mc->platform_max;
+ max = mc->max - mc->min;
+ if (mc->platform_max && mc->platform_max < max)
+ max = mc->platform_max;
uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
uinfo->count = snd_soc_volsw_is_stereo(mc) ? 2 : 1;
uinfo->value.integer.min = 0;
- uinfo->value.integer.max = platform_max - min;
+ uinfo->value.integer.max = max;
return 0;
}
diff --git a/sound/soc/tegra/tegra210_adx.c b/sound/soc/tegra/tegra210_adx.c
index 3e6e8f51f380..0aa93b948378 100644
--- a/sound/soc/tegra/tegra210_adx.c
+++ b/sound/soc/tegra/tegra210_adx.c
@@ -264,7 +264,7 @@ static const struct snd_soc_dai_ops tegra210_adx_out_dai_ops = {
.rates = SNDRV_PCM_RATE_8000_192000, \
.formats = SNDRV_PCM_FMTBIT_S8 | \
SNDRV_PCM_FMTBIT_S16_LE | \
- SNDRV_PCM_FMTBIT_S16_LE | \
+ SNDRV_PCM_FMTBIT_S24_LE | \
SNDRV_PCM_FMTBIT_S32_LE, \
}, \
.capture = { \
@@ -274,7 +274,7 @@ static const struct snd_soc_dai_ops tegra210_adx_out_dai_ops = {
.rates = SNDRV_PCM_RATE_8000_192000, \
.formats = SNDRV_PCM_FMTBIT_S8 | \
SNDRV_PCM_FMTBIT_S16_LE | \
- SNDRV_PCM_FMTBIT_S16_LE | \
+ SNDRV_PCM_FMTBIT_S24_LE | \
SNDRV_PCM_FMTBIT_S32_LE, \
}, \
.ops = &tegra210_adx_out_dai_ops, \
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
index edc56e2cc606..7bc148889ca7 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
@@ -11,8 +11,8 @@ ALL_TESTS="
lib_dir=$(dirname "$0")
source ${lib_dir}/bond_topo_3d1c.sh
-c_maddr="33:33:00:00:00:10"
-g_maddr="33:33:00:00:02:54"
+c_maddr="33:33:ff:00:00:10"
+g_maddr="33:33:ff:00:02:54"
skip_prio()
{
diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py
index eb83e7b48797..93f4b411b378 100755
--- a/tools/testing/selftests/drivers/net/ping.py
+++ b/tools/testing/selftests/drivers/net/ping.py
@@ -1,49 +1,219 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
+import os
+import random, string, time
from lib.py import ksft_run, ksft_exit
-from lib.py import ksft_eq
-from lib.py import NetDrvEpEnv
+from lib.py import ksft_eq, KsftSkipEx, KsftFailEx
+from lib.py import EthtoolFamily, NetDrvEpEnv
from lib.py import bkg, cmd, wait_port_listen, rand_port
+from lib.py import ethtool, ip
+remote_ifname=""
+no_sleep=False
-def test_v4(cfg) -> None:
+def _test_v4(cfg) -> None:
cfg.require_v4()
cmd(f"ping -c 1 -W0.5 {cfg.remote_v4}")
cmd(f"ping -c 1 -W0.5 {cfg.v4}", host=cfg.remote)
+ cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.remote_v4}")
+ cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.v4}", host=cfg.remote)
-
-def test_v6(cfg) -> None:
+def _test_v6(cfg) -> None:
cfg.require_v6()
- cmd(f"ping -c 1 -W0.5 {cfg.remote_v6}")
- cmd(f"ping -c 1 -W0.5 {cfg.v6}", host=cfg.remote)
-
+ cmd(f"ping -c 1 -W5 {cfg.remote_v6}")
+ cmd(f"ping -c 1 -W5 {cfg.v6}", host=cfg.remote)
+ cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.remote_v6}")
+ cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.v6}", host=cfg.remote)
-def test_tcp(cfg) -> None:
+def _test_tcp(cfg) -> None:
cfg.require_cmd("socat", remote=True)
port = rand_port()
listen_cmd = f"socat -{cfg.addr_ipver} -t 2 -u TCP-LISTEN:{port},reuseport STDOUT"
+ test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536))
with bkg(listen_cmd, exit_wait=True) as nc:
wait_port_listen(port)
- cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}",
+ cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}",
shell=True, host=cfg.remote)
- ksft_eq(nc.stdout.strip(), "ping")
+ ksft_eq(nc.stdout.strip(), test_string)
+ test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536))
with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc:
wait_port_listen(port, host=cfg.remote)
- cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True)
- ksft_eq(nc.stdout.strip(), "ping")
-
+ cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True)
+ ksft_eq(nc.stdout.strip(), test_string)
+
+def _set_offload_checksum(cfg, netnl, on) -> None:
+ try:
+ ethtool(f" -K {cfg.ifname} rx {on} tx {on} ")
+ except:
+ return
+
+def _set_xdp_generic_sb_on(cfg) -> None:
+ test_dir = os.path.dirname(os.path.realpath(__file__))
+ prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+ cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True)
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def _set_xdp_generic_mb_on(cfg) -> None:
+ test_dir = os.path.dirname(os.path.realpath(__file__))
+ prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote)
+ ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog))
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def _set_xdp_native_sb_on(cfg) -> None:
+ test_dir = os.path.dirname(os.path.realpath(__file__))
+ prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+ cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True)
+ xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0]
+ if xdp_info['xdp']['mode'] != 1:
+ """
+ If the interface doesn't support native-mode, it falls back to generic mode.
+ The mode value 1 is native and 2 is generic.
+ So it raises an exception if mode is not 1(native mode).
+ """
+ raise KsftSkipEx('device does not support native-XDP')
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def _set_xdp_native_mb_on(cfg) -> None:
+ test_dir = os.path.dirname(os.path.realpath(__file__))
+ prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote)
+ try:
+ cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True)
+ except Exception as e:
+ cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+ raise KsftSkipEx('device does not support native-multi-buffer XDP')
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def _set_xdp_offload_on(cfg) -> None:
+ test_dir = os.path.dirname(os.path.realpath(__file__))
+ prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True)
+ try:
+ cmd(f"ip link set dev {cfg.ifname} xdpoffload obj {prog} sec xdp", shell=True)
+ except Exception as e:
+ raise KsftSkipEx('device does not support offloaded XDP')
+ cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def get_interface_info(cfg) -> None:
+ global remote_ifname
+ global no_sleep
+
+ remote_info = cmd(f"ip -4 -o addr show to {cfg.remote_v4} | awk '{{print $2}}'", shell=True, host=cfg.remote).stdout
+ remote_ifname = remote_info.rstrip('\n')
+ if remote_ifname == "":
+ raise KsftFailEx('Can not get remote interface')
+ local_info = ip("-d link show %s" % (cfg.ifname), json=True)[0]
+ if 'parentbus' in local_info and local_info['parentbus'] == "netdevsim":
+ no_sleep=True
+ if 'linkinfo' in local_info and local_info['linkinfo']['info_kind'] == "veth":
+ no_sleep=True
+
+def set_interface_init(cfg) -> None:
+ cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True)
+ cmd(f"ip link set dev {cfg.ifname} xdp off ", shell=True)
+ cmd(f"ip link set dev {cfg.ifname} xdpgeneric off ", shell=True)
+ cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True)
+ cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+
+def test_default(cfg, netnl) -> None:
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+
+def test_xdp_generic_sb(cfg, netnl) -> None:
+ _set_xdp_generic_sb_on(cfg)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ ip("link set dev %s xdpgeneric off" % cfg.ifname)
+
+def test_xdp_generic_mb(cfg, netnl) -> None:
+ _set_xdp_generic_mb_on(cfg)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ ip("link set dev %s xdpgeneric off" % cfg.ifname)
+
+def test_xdp_native_sb(cfg, netnl) -> None:
+ _set_xdp_native_sb_on(cfg)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ ip("link set dev %s xdp off" % cfg.ifname)
+
+def test_xdp_native_mb(cfg, netnl) -> None:
+ _set_xdp_native_mb_on(cfg)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ ip("link set dev %s xdp off" % cfg.ifname)
+
+def test_xdp_offload(cfg, netnl) -> None:
+ _set_xdp_offload_on(cfg)
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ ip("link set dev %s xdpoffload off" % cfg.ifname)
def main() -> None:
with NetDrvEpEnv(__file__) as cfg:
- ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, ))
+ get_interface_info(cfg)
+ set_interface_init(cfg)
+ ksft_run([test_default,
+ test_xdp_generic_sb,
+ test_xdp_generic_mb,
+ test_xdp_native_sb,
+ test_xdp_native_mb,
+ test_xdp_offload],
+ args=(cfg, EthtoolFamily()))
+ set_interface_init(cfg)
ksft_exit()
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 4277b983cace..f62b0a5aba35 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -50,8 +50,18 @@ LIBKVM_riscv += lib/riscv/ucall.c
# Non-compiled test targets
TEST_PROGS_x86 += x86/nx_huge_pages_test.sh
+# Compiled test targets valid on all architectures with libkvm support
+TEST_GEN_PROGS_COMMON = demand_paging_test
+TEST_GEN_PROGS_COMMON += dirty_log_test
+TEST_GEN_PROGS_COMMON += guest_print_test
+TEST_GEN_PROGS_COMMON += kvm_binary_stats_test
+TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus
+TEST_GEN_PROGS_COMMON += kvm_page_table_test
+TEST_GEN_PROGS_COMMON += set_memory_region_test
+
# Compiled test targets
-TEST_GEN_PROGS_x86 = x86/cpuid_test
+TEST_GEN_PROGS_x86 = $(TEST_GEN_PROGS_COMMON)
+TEST_GEN_PROGS_x86 += x86/cpuid_test
TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test
TEST_GEN_PROGS_x86 += x86/feature_msrs_test
@@ -69,6 +79,7 @@ TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush
TEST_GEN_PROGS_x86 += x86/kvm_clock_test
TEST_GEN_PROGS_x86 += x86/kvm_pv_test
TEST_GEN_PROGS_x86 += x86/monitor_mwait_test
+TEST_GEN_PROGS_x86 += x86/nested_emulation_test
TEST_GEN_PROGS_x86 += x86/nested_exceptions_test
TEST_GEN_PROGS_x86 += x86/platform_info_test
TEST_GEN_PROGS_x86 += x86/pmu_counters_test
@@ -118,27 +129,21 @@ TEST_GEN_PROGS_x86 += x86/triple_fault_event_test
TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test
TEST_GEN_PROGS_x86 += access_tracking_perf_test
TEST_GEN_PROGS_x86 += coalesced_io_test
-TEST_GEN_PROGS_x86 += demand_paging_test
-TEST_GEN_PROGS_x86 += dirty_log_test
TEST_GEN_PROGS_x86 += dirty_log_perf_test
TEST_GEN_PROGS_x86 += guest_memfd_test
-TEST_GEN_PROGS_x86 += guest_print_test
TEST_GEN_PROGS_x86 += hardware_disable_test
-TEST_GEN_PROGS_x86 += kvm_create_max_vcpus
-TEST_GEN_PROGS_x86 += kvm_page_table_test
TEST_GEN_PROGS_x86 += memslot_modification_stress_test
TEST_GEN_PROGS_x86 += memslot_perf_test
TEST_GEN_PROGS_x86 += mmu_stress_test
TEST_GEN_PROGS_x86 += rseq_test
-TEST_GEN_PROGS_x86 += set_memory_region_test
TEST_GEN_PROGS_x86 += steal_time
-TEST_GEN_PROGS_x86 += kvm_binary_stats_test
TEST_GEN_PROGS_x86 += system_counter_offset_test
TEST_GEN_PROGS_x86 += pre_fault_memory_test
# Compiled outputs used by test targets
TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test
+TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs
TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
@@ -157,22 +162,16 @@ TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3
TEST_GEN_PROGS_arm64 += access_tracking_perf_test
TEST_GEN_PROGS_arm64 += arch_timer
TEST_GEN_PROGS_arm64 += coalesced_io_test
-TEST_GEN_PROGS_arm64 += demand_paging_test
-TEST_GEN_PROGS_arm64 += dirty_log_test
TEST_GEN_PROGS_arm64 += dirty_log_perf_test
-TEST_GEN_PROGS_arm64 += guest_print_test
TEST_GEN_PROGS_arm64 += get-reg-list
-TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus
-TEST_GEN_PROGS_arm64 += kvm_page_table_test
TEST_GEN_PROGS_arm64 += memslot_modification_stress_test
TEST_GEN_PROGS_arm64 += memslot_perf_test
TEST_GEN_PROGS_arm64 += mmu_stress_test
TEST_GEN_PROGS_arm64 += rseq_test
-TEST_GEN_PROGS_arm64 += set_memory_region_test
TEST_GEN_PROGS_arm64 += steal_time
-TEST_GEN_PROGS_arm64 += kvm_binary_stats_test
-TEST_GEN_PROGS_s390 = s390/memop
+TEST_GEN_PROGS_s390 = $(TEST_GEN_PROGS_COMMON)
+TEST_GEN_PROGS_s390 += s390/memop
TEST_GEN_PROGS_s390 += s390/resets
TEST_GEN_PROGS_s390 += s390/sync_regs_test
TEST_GEN_PROGS_s390 += s390/tprot
@@ -181,27 +180,14 @@ TEST_GEN_PROGS_s390 += s390/debug_test
TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test
TEST_GEN_PROGS_s390 += s390/shared_zeropage_test
TEST_GEN_PROGS_s390 += s390/ucontrol_test
-TEST_GEN_PROGS_s390 += demand_paging_test
-TEST_GEN_PROGS_s390 += dirty_log_test
-TEST_GEN_PROGS_s390 += guest_print_test
-TEST_GEN_PROGS_s390 += kvm_create_max_vcpus
-TEST_GEN_PROGS_s390 += kvm_page_table_test
TEST_GEN_PROGS_s390 += rseq_test
-TEST_GEN_PROGS_s390 += set_memory_region_test
-TEST_GEN_PROGS_s390 += kvm_binary_stats_test
+TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
TEST_GEN_PROGS_riscv += riscv/ebreak_test
TEST_GEN_PROGS_riscv += arch_timer
TEST_GEN_PROGS_riscv += coalesced_io_test
-TEST_GEN_PROGS_riscv += demand_paging_test
-TEST_GEN_PROGS_riscv += dirty_log_test
TEST_GEN_PROGS_riscv += get-reg-list
-TEST_GEN_PROGS_riscv += guest_print_test
-TEST_GEN_PROGS_riscv += kvm_binary_stats_test
-TEST_GEN_PROGS_riscv += kvm_create_max_vcpus
-TEST_GEN_PROGS_riscv += kvm_page_table_test
-TEST_GEN_PROGS_riscv += set_memory_region_test
TEST_GEN_PROGS_riscv += steal_time
SPLIT_TESTS += arch_timer
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index 3c7defd34f56..447e619cf856 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -239,7 +239,7 @@ static void vcpu_thread_main(struct memstress_vcpu_args *vcpu_args)
case ITERATION_MARK_IDLE:
mark_vcpu_memory_idle(vm, vcpu_args);
break;
- };
+ }
vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
}
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index aacf80f57439..23593d9eeba9 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -31,15 +31,18 @@
/* Default guest test virtual memory offset */
#define DEFAULT_GUEST_TEST_MEM 0xc0000000
-/* How many pages to dirty for each guest loop */
-#define TEST_PAGES_PER_LOOP 1024
-
/* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */
#define TEST_HOST_LOOP_N 32UL
/* Interval for each host loop (ms) */
#define TEST_HOST_LOOP_INTERVAL 10UL
+/*
+ * Ensure the vCPU is able to perform a reasonable number of writes in each
+ * iteration to provide a lower bound on coverage.
+ */
+#define TEST_MIN_WRITES_PER_ITERATION 0x100
+
/* Dirty bitmaps are always little endian, so we need to swap on big endian */
#if defined(__s390x__)
# define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7)
@@ -75,6 +78,8 @@ static uint64_t host_page_size;
static uint64_t guest_page_size;
static uint64_t guest_num_pages;
static uint64_t iteration;
+static uint64_t nr_writes;
+static bool vcpu_stop;
/*
* Guest physical memory offset of the testing memory slot.
@@ -96,7 +101,9 @@ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
static void guest_code(void)
{
uint64_t addr;
- int i;
+
+#ifdef __s390x__
+ uint64_t i;
/*
* On s390x, all pages of a 1M segment are initially marked as dirty
@@ -107,16 +114,19 @@ static void guest_code(void)
for (i = 0; i < guest_num_pages; i++) {
addr = guest_test_virt_mem + i * guest_page_size;
vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration));
+ nr_writes++;
}
+#endif
while (true) {
- for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
+ while (!READ_ONCE(vcpu_stop)) {
addr = guest_test_virt_mem;
addr += (guest_random_u64(&guest_rng) % guest_num_pages)
* guest_page_size;
addr = align_down(addr, host_page_size);
vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration));
+ nr_writes++;
}
GUEST_SYNC(1);
@@ -133,25 +143,18 @@ static uint64_t host_num_pages;
/* For statistics only */
static uint64_t host_dirty_count;
static uint64_t host_clear_count;
-static uint64_t host_track_next_count;
/* Whether dirty ring reset is requested, or finished */
static sem_t sem_vcpu_stop;
static sem_t sem_vcpu_cont;
-/*
- * This is only set by main thread, and only cleared by vcpu thread. It is
- * used to request vcpu thread to stop at the next GUEST_SYNC, since GUEST_SYNC
- * is the only place that we'll guarantee both "dirty bit" and "dirty data"
- * will match. E.g., SIG_IPI won't guarantee that if the vcpu is interrupted
- * after setting dirty bit but before the data is written.
- */
-static atomic_t vcpu_sync_stop_requested;
+
/*
* This is updated by the vcpu thread to tell the host whether it's a
* ring-full event. It should only be read until a sem_wait() of
* sem_vcpu_stop and before vcpu continues to run.
*/
static bool dirty_ring_vcpu_ring_full;
+
/*
* This is only used for verifying the dirty pages. Dirty ring has a very
* tricky case when the ring just got full, kvm will do userspace exit due to
@@ -166,7 +169,51 @@ static bool dirty_ring_vcpu_ring_full;
* dirty gfn we've collected, so that if a mismatch of data found later in the
* verifying process, we let it pass.
*/
-static uint64_t dirty_ring_last_page;
+static uint64_t dirty_ring_last_page = -1ULL;
+
+/*
+ * In addition to the above, it is possible (especially if this
+ * test is run nested) for the above scenario to repeat multiple times:
+ *
+ * The following can happen:
+ *
+ * - L1 vCPU: Memory write is logged to PML but not committed.
+ *
+ * - L1 test thread: Ignores the write because its last dirty ring entry
+ * Resets the dirty ring which:
+ * - Resets the A/D bits in EPT
+ * - Issues tlb flush (invept), which is intercepted by L0
+ *
+ * - L0: frees the whole nested ept mmu root as the response to invept,
+ * and thus ensures that when memory write is retried, it will fault again
+ *
+ * - L1 vCPU: Same memory write is logged to the PML but not committed again.
+ *
+ * - L1 test thread: Ignores the write because its last dirty ring entry (again)
+ * Resets the dirty ring which:
+ * - Resets the A/D bits in EPT (again)
+ * - Issues tlb flush (again) which is intercepted by L0
+ *
+ * ...
+ *
+ * N times
+ *
+ * - L1 vCPU: Memory write is logged in the PML and then committed.
+ * Lots of other memory writes are logged and committed.
+ * ...
+ *
+ * - L1 test thread: Sees the memory write along with other memory writes
+ * in the dirty ring, and since the write is usually not
+ * the last entry in the dirty-ring and has a very outdated
+ * iteration, the test fails.
+ *
+ *
+ * Note that this is only possible when the write was the last log entry
+ * write during iteration N-1, thus remember last iteration last log entry
+ * and also don't fail when it is reported in the next iteration, together with
+ * an outdated iteration count.
+ */
+static uint64_t dirty_ring_prev_iteration_last_page;
enum log_mode_t {
/* Only use KVM_GET_DIRTY_LOG for logging */
@@ -191,24 +238,6 @@ static enum log_mode_t host_log_mode;
static pthread_t vcpu_thread;
static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT;
-static void vcpu_kick(void)
-{
- pthread_kill(vcpu_thread, SIG_IPI);
-}
-
-/*
- * In our test we do signal tricks, let's use a better version of
- * sem_wait to avoid signal interrupts
- */
-static void sem_wait_until(sem_t *sem)
-{
- int ret;
-
- do
- ret = sem_wait(sem);
- while (ret == -1 && errno == EINTR);
-}
-
static bool clear_log_supported(void)
{
return kvm_has_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
@@ -243,21 +272,16 @@ static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
/* Should only be called after a GUEST_SYNC */
static void vcpu_handle_sync_stop(void)
{
- if (atomic_read(&vcpu_sync_stop_requested)) {
- /* It means main thread is sleeping waiting */
- atomic_set(&vcpu_sync_stop_requested, false);
+ if (READ_ONCE(vcpu_stop)) {
sem_post(&sem_vcpu_stop);
- sem_wait_until(&sem_vcpu_cont);
+ sem_wait(&sem_vcpu_cont);
}
}
-static void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
+static void default_after_vcpu_run(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
- TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR),
- "vcpu run failed: errno=%d", err);
-
TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
"Invalid guest sync status: exit_reason=%s",
exit_reason_str(run->exit_reason));
@@ -324,7 +348,6 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
"%u != %u", cur->slot, slot);
TEST_ASSERT(cur->offset < num_pages, "Offset overflow: "
"0x%llx >= 0x%x", cur->offset, num_pages);
- //pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset);
__set_bit_le(cur->offset, bitmap);
dirty_ring_last_page = cur->offset;
dirty_gfn_set_collected(cur);
@@ -335,36 +358,11 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
return count;
}
-static void dirty_ring_wait_vcpu(void)
-{
- /* This makes sure that hardware PML cache flushed */
- vcpu_kick();
- sem_wait_until(&sem_vcpu_stop);
-}
-
-static void dirty_ring_continue_vcpu(void)
-{
- pr_info("Notifying vcpu to continue\n");
- sem_post(&sem_vcpu_cont);
-}
-
static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
void *bitmap, uint32_t num_pages,
uint32_t *ring_buf_idx)
{
- uint32_t count = 0, cleared;
- bool continued_vcpu = false;
-
- dirty_ring_wait_vcpu();
-
- if (!dirty_ring_vcpu_ring_full) {
- /*
- * This is not a ring-full event, it's safe to allow
- * vcpu to continue
- */
- dirty_ring_continue_vcpu();
- continued_vcpu = true;
- }
+ uint32_t count, cleared;
/* Only have one vcpu */
count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu),
@@ -379,35 +377,18 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
*/
TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch "
"with collected (%u)", cleared, count);
-
- if (!continued_vcpu) {
- TEST_ASSERT(dirty_ring_vcpu_ring_full,
- "Didn't continue vcpu even without ring full");
- dirty_ring_continue_vcpu();
- }
-
- pr_info("Iteration %ld collected %u pages\n", iteration, count);
}
-static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
+static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
/* A ucall-sync or ring-full event is allowed */
if (get_ucall(vcpu, NULL) == UCALL_SYNC) {
- /* We should allow this to continue */
- ;
- } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL ||
- (ret == -1 && err == EINTR)) {
- /* Update the flag first before pause */
- WRITE_ONCE(dirty_ring_vcpu_ring_full,
- run->exit_reason == KVM_EXIT_DIRTY_RING_FULL);
- sem_post(&sem_vcpu_stop);
- pr_info("vcpu stops because %s...\n",
- dirty_ring_vcpu_ring_full ?
- "dirty ring is full" : "vcpu is kicked out");
- sem_wait_until(&sem_vcpu_cont);
- pr_info("vcpu continues now.\n");
+ vcpu_handle_sync_stop();
+ } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL) {
+ WRITE_ONCE(dirty_ring_vcpu_ring_full, true);
+ vcpu_handle_sync_stop();
} else {
TEST_ASSERT(false, "Invalid guest sync status: "
"exit_reason=%s",
@@ -426,7 +407,7 @@ struct log_mode {
void *bitmap, uint32_t num_pages,
uint32_t *ring_buf_idx);
/* Hook to call when after each vcpu run */
- void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err);
+ void (*after_vcpu_run)(struct kvm_vcpu *vcpu);
} log_modes[LOG_MODE_NUM] = {
{
.name = "dirty-log",
@@ -449,15 +430,6 @@ struct log_mode {
},
};
-/*
- * We use this bitmap to track some pages that should have its dirty
- * bit set in the _next_ iteration. For example, if we detected the
- * page value changed to current iteration but at the same time the
- * page bit is cleared in the latest bitmap, then the system must
- * report that write in the next get dirty log call.
- */
-static unsigned long *host_bmap_track;
-
static void log_modes_dump(void)
{
int i;
@@ -497,170 +469,109 @@ static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx);
}
-static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
+static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu)
{
struct log_mode *mode = &log_modes[host_log_mode];
if (mode->after_vcpu_run)
- mode->after_vcpu_run(vcpu, ret, err);
+ mode->after_vcpu_run(vcpu);
}
static void *vcpu_worker(void *data)
{
- int ret;
struct kvm_vcpu *vcpu = data;
- uint64_t pages_count = 0;
- struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset)
- + sizeof(sigset_t));
- sigset_t *sigset = (sigset_t *) &sigmask->sigset;
- /*
- * SIG_IPI is unblocked atomically while in KVM_RUN. It causes the
- * ioctl to return with -EINTR, but it is still pending and we need
- * to accept it with the sigwait.
- */
- sigmask->len = 8;
- pthread_sigmask(0, NULL, sigset);
- sigdelset(sigset, SIG_IPI);
- vcpu_ioctl(vcpu, KVM_SET_SIGNAL_MASK, sigmask);
-
- sigemptyset(sigset);
- sigaddset(sigset, SIG_IPI);
+ sem_wait(&sem_vcpu_cont);
while (!READ_ONCE(host_quit)) {
- /* Clear any existing kick signals */
- pages_count += TEST_PAGES_PER_LOOP;
/* Let the guest dirty the random pages */
- ret = __vcpu_run(vcpu);
- if (ret == -1 && errno == EINTR) {
- int sig = -1;
- sigwait(sigset, &sig);
- assert(sig == SIG_IPI);
- }
- log_mode_after_vcpu_run(vcpu, ret, errno);
+ vcpu_run(vcpu);
+ log_mode_after_vcpu_run(vcpu);
}
- pr_info("Dirtied %"PRIu64" pages\n", pages_count);
-
return NULL;
}
-static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
+static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long **bmap)
{
+ uint64_t page, nr_dirty_pages = 0, nr_clean_pages = 0;
uint64_t step = vm_num_host_pages(mode, 1);
- uint64_t page;
- uint64_t *value_ptr;
- uint64_t min_iter = 0;
for (page = 0; page < host_num_pages; page += step) {
- value_ptr = host_test_mem + page * host_page_size;
-
- /* If this is a special page that we were tracking... */
- if (__test_and_clear_bit_le(page, host_bmap_track)) {
- host_track_next_count++;
- TEST_ASSERT(test_bit_le(page, bmap),
- "Page %"PRIu64" should have its dirty bit "
- "set in this iteration but it is missing",
- page);
- }
+ uint64_t val = *(uint64_t *)(host_test_mem + page * host_page_size);
+ bool bmap0_dirty = __test_and_clear_bit_le(page, bmap[0]);
- if (__test_and_clear_bit_le(page, bmap)) {
- bool matched;
-
- host_dirty_count++;
+ /*
+ * Ensure both bitmaps are cleared, as a page can be written
+ * multiple times per iteration, i.e. can show up in both
+ * bitmaps, and the dirty ring is additive, i.e. doesn't purge
+ * bitmap entries from previous collections.
+ */
+ if (__test_and_clear_bit_le(page, bmap[1]) || bmap0_dirty) {
+ nr_dirty_pages++;
/*
- * If the bit is set, the value written onto
- * the corresponding page should be either the
- * previous iteration number or the current one.
+ * If the page is dirty, the value written to memory
+ * should be the current iteration number.
*/
- matched = (*value_ptr == iteration ||
- *value_ptr == iteration - 1);
-
- if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) {
- if (*value_ptr == iteration - 2 && min_iter <= iteration - 2) {
- /*
- * Short answer: this case is special
- * only for dirty ring test where the
- * page is the last page before a kvm
- * dirty ring full in iteration N-2.
- *
- * Long answer: Assuming ring size R,
- * one possible condition is:
- *
- * main thr vcpu thr
- * -------- --------
- * iter=1
- * write 1 to page 0~(R-1)
- * full, vmexit
- * collect 0~(R-1)
- * kick vcpu
- * write 1 to (R-1)~(2R-2)
- * full, vmexit
- * iter=2
- * collect (R-1)~(2R-2)
- * kick vcpu
- * write 1 to (2R-2)
- * (NOTE!!! "1" cached in cpu reg)
- * write 2 to (2R-1)~(3R-3)
- * full, vmexit
- * iter=3
- * collect (2R-2)~(3R-3)
- * (here if we read value on page
- * "2R-2" is 1, while iter=3!!!)
- *
- * This however can only happen once per iteration.
- */
- min_iter = iteration - 1;
+ if (val == iteration)
+ continue;
+
+ if (host_log_mode == LOG_MODE_DIRTY_RING) {
+ /*
+ * The last page in the ring from previous
+ * iteration can be written with the value
+ * from the previous iteration, as the value to
+ * be written may be cached in a CPU register.
+ */
+ if (page == dirty_ring_prev_iteration_last_page &&
+ val == iteration - 1)
continue;
- } else if (page == dirty_ring_last_page) {
- /*
- * Please refer to comments in
- * dirty_ring_last_page.
- */
+
+ /*
+ * Any value from a previous iteration is legal
+ * for the last entry, as the write may not yet
+ * have retired, i.e. the page may hold whatever
+ * it had before this iteration started.
+ */
+ if (page == dirty_ring_last_page &&
+ val < iteration)
continue;
- }
+ } else if (!val && iteration == 1 && bmap0_dirty) {
+ /*
+ * When testing get+clear, the dirty bitmap
+ * starts with all bits set, and so the first
+ * iteration can observe a "dirty" page that
+ * was never written, but only in the first
+ * bitmap (collecting the bitmap also clears
+ * all dirty pages).
+ */
+ continue;
}
- TEST_ASSERT(matched,
- "Set page %"PRIu64" value %"PRIu64
- " incorrect (iteration=%"PRIu64")",
- page, *value_ptr, iteration);
+ TEST_FAIL("Dirty page %lu value (%lu) != iteration (%lu) "
+ "(last = %lu, prev_last = %lu)",
+ page, val, iteration, dirty_ring_last_page,
+ dirty_ring_prev_iteration_last_page);
} else {
- host_clear_count++;
+ nr_clean_pages++;
/*
* If cleared, the value written can be any
- * value smaller or equals to the iteration
- * number. Note that the value can be exactly
- * (iteration-1) if that write can happen
- * like this:
- *
- * (1) increase loop count to "iteration-1"
- * (2) write to page P happens (with value
- * "iteration-1")
- * (3) get dirty log for "iteration-1"; we'll
- * see that page P bit is set (dirtied),
- * and not set the bit in host_bmap_track
- * (4) increase loop count to "iteration"
- * (which is current iteration)
- * (5) get dirty log for current iteration,
- * we'll see that page P is cleared, with
- * value "iteration-1".
+ * value smaller than the iteration number.
*/
- TEST_ASSERT(*value_ptr <= iteration,
- "Clear page %"PRIu64" value %"PRIu64
- " incorrect (iteration=%"PRIu64")",
- page, *value_ptr, iteration);
- if (*value_ptr == iteration) {
- /*
- * This page is _just_ modified; it
- * should report its dirtyness in the
- * next run
- */
- __set_bit_le(page, host_bmap_track);
- }
+ TEST_ASSERT(val < iteration,
+ "Clear page %lu value (%lu) >= iteration (%lu) "
+ "(last = %lu, prev_last = %lu)",
+ page, val, iteration, dirty_ring_last_page,
+ dirty_ring_prev_iteration_last_page);
}
}
+
+ pr_info("Iteration %2ld: dirty: %-6lu clean: %-6lu writes: %-6lu\n",
+ iteration, nr_dirty_pages, nr_clean_pages, nr_writes);
+
+ host_dirty_count += nr_dirty_pages;
+ host_clear_count += nr_clean_pages;
}
static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu,
@@ -688,7 +599,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
struct test_params *p = arg;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- unsigned long *bmap;
+ unsigned long *bmap[2];
uint32_t ring_buf_idx = 0;
int sem_val;
@@ -731,12 +642,21 @@ static void run_test(enum vm_guest_mode mode, void *arg)
#ifdef __s390x__
/* Align to 1M (segment size) */
guest_test_phys_mem = align_down(guest_test_phys_mem, 1 << 20);
+
+ /*
+ * The workaround in guest_code() to write all pages prior to the first
+ * iteration isn't compatible with the dirty ring, as the dirty ring
+ * support relies on the vCPU to actually stop when vcpu_stop is set so
+ * that the vCPU doesn't hang waiting for the dirty ring to be emptied.
+ */
+ TEST_ASSERT(host_log_mode != LOG_MODE_DIRTY_RING,
+ "Test needs to be updated to support s390 dirty ring");
#endif
pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
- bmap = bitmap_zalloc(host_num_pages);
- host_bmap_track = bitmap_zalloc(host_num_pages);
+ bmap[0] = bitmap_zalloc(host_num_pages);
+ bmap[1] = bitmap_zalloc(host_num_pages);
/* Add an extra memory slot for testing dirty logging */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
@@ -757,14 +677,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
sync_global_to_guest(vm, guest_test_virt_mem);
sync_global_to_guest(vm, guest_num_pages);
- /* Start the iterations */
- iteration = 1;
- sync_global_to_guest(vm, iteration);
- WRITE_ONCE(host_quit, false);
host_dirty_count = 0;
host_clear_count = 0;
- host_track_next_count = 0;
- WRITE_ONCE(dirty_ring_vcpu_ring_full, false);
+ WRITE_ONCE(host_quit, false);
/*
* Ensure the previous iteration didn't leave a dangling semaphore, i.e.
@@ -776,21 +691,95 @@ static void run_test(enum vm_guest_mode mode, void *arg)
sem_getvalue(&sem_vcpu_cont, &sem_val);
TEST_ASSERT_EQ(sem_val, 0);
+ TEST_ASSERT_EQ(vcpu_stop, false);
+
pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu);
- while (iteration < p->iterations) {
- /* Give the vcpu thread some time to dirty some pages */
- usleep(p->interval * 1000);
- log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
- bmap, host_num_pages,
- &ring_buf_idx);
+ for (iteration = 1; iteration <= p->iterations; iteration++) {
+ unsigned long i;
+
+ sync_global_to_guest(vm, iteration);
+
+ WRITE_ONCE(nr_writes, 0);
+ sync_global_to_guest(vm, nr_writes);
+
+ dirty_ring_prev_iteration_last_page = dirty_ring_last_page;
+ WRITE_ONCE(dirty_ring_vcpu_ring_full, false);
+
+ sem_post(&sem_vcpu_cont);
+
+ /*
+ * Let the vCPU run beyond the configured interval until it has
+ * performed the minimum number of writes. This verifies the
+ * guest is making forward progress, e.g. isn't stuck because
+ * of a KVM bug, and puts a firm floor on test coverage.
+ */
+ for (i = 0; i < p->interval || nr_writes < TEST_MIN_WRITES_PER_ITERATION; i++) {
+ /*
+ * Sleep in 1ms chunks to keep the interval math simple
+ * and so that the test doesn't run too far beyond the
+ * specified interval.
+ */
+ usleep(1000);
+
+ sync_global_from_guest(vm, nr_writes);
+
+ /*
+ * Reap dirty pages while the guest is running so that
+ * dirty ring full events are resolved, i.e. so that a
+ * larger interval doesn't always end up with a vCPU
+ * that's effectively blocked. Collecting while the
+ * guest is running also verifies KVM doesn't lose any
+ * state.
+ *
+ * For bitmap modes, KVM overwrites the entire bitmap,
+ * i.e. collecting the bitmaps is destructive. Collect
+ * the bitmap only on the first pass, otherwise this
+ * test would lose track of dirty pages.
+ */
+ if (i && host_log_mode != LOG_MODE_DIRTY_RING)
+ continue;
+
+ /*
+ * For the dirty ring, empty the ring on subsequent
+ * passes only if the ring was filled at least once,
+ * to verify KVM's handling of a full ring (emptying
+ * the ring on every pass would make it unlikely the
+ * vCPU would ever fill the fing).
+ */
+ if (i && !READ_ONCE(dirty_ring_vcpu_ring_full))
+ continue;
+
+ log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
+ bmap[0], host_num_pages,
+ &ring_buf_idx);
+ }
+
+ /*
+ * Stop the vCPU prior to collecting and verifying the dirty
+ * log. If the vCPU is allowed to run during collection, then
+ * pages that are written during this iteration may be missed,
+ * i.e. collected in the next iteration. And if the vCPU is
+ * writing memory during verification, pages that this thread
+ * sees as clean may be written with this iteration's value.
+ */
+ WRITE_ONCE(vcpu_stop, true);
+ sync_global_to_guest(vm, vcpu_stop);
+ sem_wait(&sem_vcpu_stop);
/*
- * See vcpu_sync_stop_requested definition for details on why
- * we need to stop vcpu when verify data.
+ * Clear vcpu_stop after the vCPU thread has acknowledge the
+ * stop request and is waiting, i.e. is definitely not running!
*/
- atomic_set(&vcpu_sync_stop_requested, true);
- sem_wait_until(&sem_vcpu_stop);
+ WRITE_ONCE(vcpu_stop, false);
+ sync_global_to_guest(vm, vcpu_stop);
+
+ /*
+ * Sync the number of writes performed before verification, the
+ * info will be printed along with the dirty/clean page counts.
+ */
+ sync_global_from_guest(vm, nr_writes);
+
/*
* NOTE: for dirty ring, it's possible that we didn't stop at
* GUEST_SYNC but instead we stopped because ring is full;
@@ -798,32 +787,22 @@ static void run_test(enum vm_guest_mode mode, void *arg)
* the flush of the last page, and since we handle the last
* page specially verification will succeed anyway.
*/
- assert(host_log_mode == LOG_MODE_DIRTY_RING ||
- atomic_read(&vcpu_sync_stop_requested) == false);
+ log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
+ bmap[1], host_num_pages,
+ &ring_buf_idx);
vm_dirty_log_verify(mode, bmap);
-
- /*
- * Set host_quit before sem_vcpu_cont in the final iteration to
- * ensure that the vCPU worker doesn't resume the guest. As
- * above, the dirty ring test may stop and wait even when not
- * explicitly request to do so, i.e. would hang waiting for a
- * "continue" if it's allowed to resume the guest.
- */
- if (++iteration == p->iterations)
- WRITE_ONCE(host_quit, true);
-
- sem_post(&sem_vcpu_cont);
- sync_global_to_guest(vm, iteration);
}
+ WRITE_ONCE(host_quit, true);
+ sem_post(&sem_vcpu_cont);
+
pthread_join(vcpu_thread, NULL);
- pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
- "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count,
- host_track_next_count);
+ pr_info("Total bits checked: dirty (%lu), clear (%lu)\n",
+ host_dirty_count, host_clear_count);
- free(bmap);
- free(host_bmap_track);
+ free(bmap[0]);
+ free(bmap[1]);
kvm_vm_free(vm);
}
@@ -857,7 +836,6 @@ int main(int argc, char *argv[])
.interval = TEST_HOST_LOOP_INTERVAL,
};
int opt, i;
- sigset_t sigset;
sem_init(&sem_vcpu_stop, 0, 0);
sem_init(&sem_vcpu_cont, 0, 0);
@@ -908,19 +886,12 @@ int main(int argc, char *argv[])
}
}
- TEST_ASSERT(p.iterations > 2, "Iterations must be greater than two");
+ TEST_ASSERT(p.iterations > 0, "Iterations must be greater than zero");
TEST_ASSERT(p.interval > 0, "Interval must be greater than zero");
pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
p.iterations, p.interval);
- srandom(time(0));
-
- /* Ensure that vCPU threads start with SIG_IPI blocked. */
- sigemptyset(&sigset);
- sigaddset(&sigset, SIG_IPI);
- pthread_sigmask(SIG_BLOCK, &sigset, NULL);
-
if (host_log_mode_option == LOG_MODE_ALL) {
/* Run each log mode */
for (i = 0; i < LOG_MODE_NUM; i++) {
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 4c4e5a847f67..373912464fb4 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -46,6 +46,12 @@ struct userspace_mem_region {
struct hlist_node slot_node;
};
+struct kvm_binary_stats {
+ int fd;
+ struct kvm_stats_header header;
+ struct kvm_stats_desc *desc;
+};
+
struct kvm_vcpu {
struct list_head list;
uint32_t id;
@@ -55,6 +61,7 @@ struct kvm_vcpu {
#ifdef __x86_64__
struct kvm_cpuid2 *cpuid;
#endif
+ struct kvm_binary_stats stats;
struct kvm_dirty_gfn *dirty_gfns;
uint32_t fetch_index;
uint32_t dirty_gfns_count;
@@ -99,10 +106,7 @@ struct kvm_vm {
struct kvm_vm_arch arch;
- /* Cache of information for binary stats interface */
- int stats_fd;
- struct kvm_stats_header stats_header;
- struct kvm_stats_desc *stats_desc;
+ struct kvm_binary_stats stats;
/*
* KVM region slots. These are the default memslots used by page
@@ -531,16 +535,19 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header,
struct kvm_stats_desc *desc, uint64_t *data,
size_t max_elements);
-void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
- size_t max_elements);
+void kvm_get_stat(struct kvm_binary_stats *stats, const char *name,
+ uint64_t *data, size_t max_elements);
-static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name)
-{
- uint64_t data;
+#define __get_stat(stats, stat) \
+({ \
+ uint64_t data; \
+ \
+ kvm_get_stat(stats, #stat, &data, 1); \
+ data; \
+})
- __vm_get_stat(vm, stat_name, &data, 1);
- return data;
-}
+#define vm_get_stat(vm, stat) __get_stat(&(vm)->stats, stat)
+#define vcpu_get_stat(vcpu, stat) __get_stat(&(vcpu)->stats, stat)
void vm_create_irqchip(struct kvm_vm *vm);
@@ -963,6 +970,8 @@ static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape
struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm);
+void kvm_set_files_rlimit(uint32_t nr_vcpus);
+
void kvm_pin_this_task_to_pcpu(uint32_t pcpu);
void kvm_print_vcpu_pinning_help(void);
void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 3e473058849f..77d13d7920cb 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -22,7 +22,7 @@
#define msecs_to_usecs(msec) ((msec) * 1000ULL)
-static inline int _no_printf(const char *format, ...) { return 0; }
+static inline __printf(1, 2) int _no_printf(const char *format, ...) { return 0; }
#ifdef DEBUG
#define pr_debug(...) printf(__VA_ARGS__)
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
index d60da8966772..32ab6ca7ec32 100644
--- a/tools/testing/selftests/kvm/include/x86/processor.h
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -183,6 +183,9 @@ struct kvm_x86_cpu_feature {
* Extended Leafs, a.k.a. AMD defined
*/
#define X86_FEATURE_SVM KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2)
+#define X86_FEATURE_PERFCTR_CORE KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 23)
+#define X86_FEATURE_PERFCTR_NB KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 24)
+#define X86_FEATURE_PERFCTR_LLC KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 28)
#define X86_FEATURE_NX KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20)
#define X86_FEATURE_GBPAGES KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26)
#define X86_FEATURE_RDTSCP KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27)
@@ -197,8 +200,11 @@ struct kvm_x86_cpu_feature {
#define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10)
#define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12)
#define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16)
+#define X86_FEATURE_IDLE_HLT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 30)
#define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
#define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
+#define X86_FEATURE_PERFMON_V2 KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 0)
+#define X86_FEATURE_LBR_PMC_FREEZE KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 2)
/*
* KVM defined paravirt features.
@@ -285,6 +291,8 @@ struct kvm_x86_cpu_property {
#define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23)
#define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
#define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
+#define X86_PROPERTY_NR_PERFCTR_CORE KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 0, 3)
+#define X86_PROPERTY_NR_PERFCTR_NB KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 10, 15)
#define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31)
@@ -1244,7 +1252,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
uint64_t ign_error_code; \
uint8_t vector; \
\
- asm volatile(KVM_ASM_SAFE(insn) \
+ asm volatile(KVM_ASM_SAFE_FEP(insn) \
: KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \
: inputs \
: KVM_ASM_SAFE_CLOBBERS); \
@@ -1339,6 +1347,46 @@ static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size,
GUEST_ASSERT(!ret);
}
+/*
+ * Execute HLT in an STI interrupt shadow to ensure that a pending IRQ that's
+ * intended to be a wake event arrives *after* HLT is executed. Modern CPUs,
+ * except for a few oddballs that KVM is unlikely to run on, block IRQs for one
+ * instruction after STI, *if* RFLAGS.IF=0 before STI. Note, Intel CPUs may
+ * block other events beyond regular IRQs, e.g. may block NMIs and SMIs too.
+ */
+static inline void safe_halt(void)
+{
+ asm volatile("sti; hlt");
+}
+
+/*
+ * Enable interrupts and ensure that interrupts are evaluated upon return from
+ * this function, i.e. execute a nop to consume the STi interrupt shadow.
+ */
+static inline void sti_nop(void)
+{
+ asm volatile ("sti; nop");
+}
+
+/*
+ * Enable interrupts for one instruction (nop), to allow the CPU to process all
+ * interrupts that are already pending.
+ */
+static inline void sti_nop_cli(void)
+{
+ asm volatile ("sti; nop; cli");
+}
+
+static inline void sti(void)
+{
+ asm volatile("sti");
+}
+
+static inline void cli(void)
+{
+ asm volatile ("cli");
+}
+
void __vm_xsave_require_permission(uint64_t xfeature, const char *name);
#define vm_xsave_require_permission(xfeature) \
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
index c78f34699f73..c5310736ed06 100644
--- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -10,7 +10,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <sys/resource.h>
#include "test_util.h"
@@ -39,36 +38,11 @@ int main(int argc, char *argv[])
{
int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID);
int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
- /*
- * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds +
- * an arbitrary number for everything else.
- */
- int nr_fds_wanted = kvm_max_vcpus + 100;
- struct rlimit rl;
pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id);
pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus);
- /*
- * Check that we're allowed to open nr_fds_wanted file descriptors and
- * try raising the limits if needed.
- */
- TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");
-
- if (rl.rlim_cur < nr_fds_wanted) {
- rl.rlim_cur = nr_fds_wanted;
- if (rl.rlim_max < nr_fds_wanted) {
- int old_rlim_max = rl.rlim_max;
- rl.rlim_max = nr_fds_wanted;
-
- int r = setrlimit(RLIMIT_NOFILE, &rl);
- __TEST_REQUIRE(r >= 0,
- "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)",
- old_rlim_max, nr_fds_wanted);
- } else {
- TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
- }
- }
+ kvm_set_files_rlimit(kvm_max_vcpus);
/*
* Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID.
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 33fefeb3ca44..815bc45dd8dc 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -12,6 +12,7 @@
#include <assert.h>
#include <sched.h>
#include <sys/mman.h>
+#include <sys/resource.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
@@ -196,6 +197,11 @@ static void vm_open(struct kvm_vm *vm)
vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type);
TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd));
+
+ if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))
+ vm->stats.fd = vm_get_stats_fd(vm);
+ else
+ vm->stats.fd = -1;
}
const char *vm_guest_mode_string(uint32_t i)
@@ -406,6 +412,38 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
return vm_adjust_num_guest_pages(mode, nr_pages);
}
+void kvm_set_files_rlimit(uint32_t nr_vcpus)
+{
+ /*
+ * Each vCPU will open two file descriptors: the vCPU itself and the
+ * vCPU's binary stats file descriptor. Add an arbitrary amount of
+ * buffer for all other files a test may open.
+ */
+ int nr_fds_wanted = nr_vcpus * 2 + 100;
+ struct rlimit rl;
+
+ /*
+ * Check that we're allowed to open nr_fds_wanted file descriptors and
+ * try raising the limits if needed.
+ */
+ TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");
+
+ if (rl.rlim_cur < nr_fds_wanted) {
+ rl.rlim_cur = nr_fds_wanted;
+ if (rl.rlim_max < nr_fds_wanted) {
+ int old_rlim_max = rl.rlim_max;
+
+ rl.rlim_max = nr_fds_wanted;
+ __TEST_REQUIRE(setrlimit(RLIMIT_NOFILE, &rl) >= 0,
+ "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)",
+ old_rlim_max, nr_fds_wanted);
+ } else {
+ TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
+ }
+ }
+
+}
+
struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
uint64_t nr_extra_pages)
{
@@ -415,6 +453,8 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
struct kvm_vm *vm;
int i;
+ kvm_set_files_rlimit(nr_runnable_vcpus);
+
pr_debug("%s: mode='%s' type='%d', pages='%ld'\n", __func__,
vm_guest_mode_string(shape.mode), shape.type, nr_pages);
@@ -657,6 +697,23 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
return NULL;
}
+static void kvm_stats_release(struct kvm_binary_stats *stats)
+{
+ int ret;
+
+ if (stats->fd < 0)
+ return;
+
+ if (stats->desc) {
+ free(stats->desc);
+ stats->desc = NULL;
+ }
+
+ ret = close(stats->fd);
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
+ stats->fd = -1;
+}
+
__weak void vcpu_arch_free(struct kvm_vcpu *vcpu)
{
@@ -690,6 +747,8 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
ret = close(vcpu->fd);
TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
+ kvm_stats_release(&vcpu->stats);
+
list_del(&vcpu->list);
vcpu_arch_free(vcpu);
@@ -709,6 +768,9 @@ void kvm_vm_release(struct kvm_vm *vmp)
ret = close(vmp->kvm_fd);
TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
+
+ /* Free cached stats metadata and close FD */
+ kvm_stats_release(&vmp->stats);
}
static void __vm_mem_region_delete(struct kvm_vm *vm,
@@ -748,12 +810,6 @@ void kvm_vm_free(struct kvm_vm *vmp)
if (vmp == NULL)
return;
- /* Free cached stats metadata and close FD */
- if (vmp->stats_fd) {
- free(vmp->stats_desc);
- close(vmp->stats_fd);
- }
-
/* Free userspace_mem_regions. */
hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)
__vm_mem_region_delete(vmp, region);
@@ -1286,6 +1342,11 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
TEST_ASSERT(vcpu->run != MAP_FAILED,
__KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+ if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))
+ vcpu->stats.fd = vcpu_get_stats_fd(vcpu);
+ else
+ vcpu->stats.fd = -1;
+
/* Add to linked-list of VCPUs. */
list_add(&vcpu->list, &vm->vcpus);
@@ -1958,9 +2019,8 @@ static struct exit_reason {
KVM_EXIT_STRING(RISCV_SBI),
KVM_EXIT_STRING(RISCV_CSR),
KVM_EXIT_STRING(NOTIFY),
-#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
- KVM_EXIT_STRING(MEMORY_NOT_PRESENT),
-#endif
+ KVM_EXIT_STRING(LOONGARCH_IOCSR),
+ KVM_EXIT_STRING(MEMORY_FAULT),
};
/*
@@ -2198,46 +2258,31 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header,
desc->name, size, ret);
}
-/*
- * Read the data of the named stat
- *
- * Input Args:
- * vm - the VM for which the stat should be read
- * stat_name - the name of the stat to read
- * max_elements - the maximum number of 8-byte values to read into data
- *
- * Output Args:
- * data - the buffer into which stat data should be read
- *
- * Read the data values of a specified stat from the binary stats interface.
- */
-void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
- size_t max_elements)
+void kvm_get_stat(struct kvm_binary_stats *stats, const char *name,
+ uint64_t *data, size_t max_elements)
{
struct kvm_stats_desc *desc;
size_t size_desc;
int i;
- if (!vm->stats_fd) {
- vm->stats_fd = vm_get_stats_fd(vm);
- read_stats_header(vm->stats_fd, &vm->stats_header);
- vm->stats_desc = read_stats_descriptors(vm->stats_fd,
- &vm->stats_header);
+ if (!stats->desc) {
+ read_stats_header(stats->fd, &stats->header);
+ stats->desc = read_stats_descriptors(stats->fd, &stats->header);
}
- size_desc = get_stats_descriptor_size(&vm->stats_header);
+ size_desc = get_stats_descriptor_size(&stats->header);
- for (i = 0; i < vm->stats_header.num_desc; ++i) {
- desc = (void *)vm->stats_desc + (i * size_desc);
+ for (i = 0; i < stats->header.num_desc; ++i) {
+ desc = (void *)stats->desc + (i * size_desc);
- if (strcmp(desc->name, stat_name))
+ if (strcmp(desc->name, name))
continue;
- read_stat_data(vm->stats_fd, &vm->stats_header, desc,
- data, max_elements);
-
- break;
+ read_stat_data(stats->fd, &stats->header, desc, data, max_elements);
+ return;
}
+
+ TEST_FAIL("Unable to find stat '%s'", name);
}
__weak void kvm_arch_vm_post_create(struct kvm_vm *vm)
diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
index 7c9de8414462..5bde176cedd5 100644
--- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c
+++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
@@ -114,7 +114,7 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
is_minor ? "MINOR" : "MISSING",
- is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
+ is_minor ? "UFFDIO_CONTINUE" : "UFFDIO_COPY");
uffd_desc = malloc(sizeof(struct uffd_desc));
TEST_ASSERT(uffd_desc, "Failed to malloc uffd descriptor");
diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
index f45c0ecc902d..03406de4989d 100644
--- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
+++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
@@ -39,7 +39,13 @@ static bool illegal_handler_invoked;
#define SBI_PMU_TEST_SNAPSHOT BIT(2)
#define SBI_PMU_TEST_OVERFLOW BIT(3)
-static int disabled_tests;
+#define SBI_PMU_OVERFLOW_IRQNUM_DEFAULT 5
+struct test_args {
+ int disabled_tests;
+ int overflow_irqnum;
+};
+
+static struct test_args targs;
unsigned long pmu_csr_read_num(int csr_num)
{
@@ -118,8 +124,8 @@ static void stop_counter(unsigned long counter, unsigned long stop_flags)
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1, stop_flags,
0, 0, 0);
- __GUEST_ASSERT(ret.error == 0, "Unable to stop counter %ld error %ld\n",
- counter, ret.error);
+ __GUEST_ASSERT(ret.error == 0 || ret.error == SBI_ERR_ALREADY_STOPPED,
+ "Unable to stop counter %ld error %ld\n", counter, ret.error);
}
static void guest_illegal_exception_handler(struct ex_regs *regs)
@@ -137,7 +143,6 @@ static void guest_irq_handler(struct ex_regs *regs)
unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG;
struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
unsigned long overflown_mask;
- unsigned long counter_val = 0;
/* Validate that we are in the correct irq handler */
GUEST_ASSERT_EQ(irq_num, IRQ_PMU_OVF);
@@ -151,10 +156,6 @@ static void guest_irq_handler(struct ex_regs *regs)
GUEST_ASSERT(overflown_mask & 0x01);
WRITE_ONCE(vcpu_shared_irq_count, vcpu_shared_irq_count+1);
-
- counter_val = READ_ONCE(snapshot_data->ctr_values[0]);
- /* Now start the counter to mimick the real driver behavior */
- start_counter(counter_in_use, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_val);
}
static unsigned long get_counter_index(unsigned long cbase, unsigned long cmask,
@@ -479,7 +480,7 @@ static void test_pmu_events_snaphost(void)
static void test_pmu_events_overflow(void)
{
- int num_counters = 0;
+ int num_counters = 0, i = 0;
/* Verify presence of SBI PMU and minimum requrired SBI version */
verify_sbi_requirement_assert();
@@ -496,11 +497,15 @@ static void test_pmu_events_overflow(void)
* Qemu supports overflow for cycle/instruction.
* This test may fail on any platform that do not support overflow for these two events.
*/
- test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES);
- GUEST_ASSERT_EQ(vcpu_shared_irq_count, 1);
+ for (i = 0; i < targs.overflow_irqnum; i++)
+ test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES);
+ GUEST_ASSERT_EQ(vcpu_shared_irq_count, targs.overflow_irqnum);
+
+ vcpu_shared_irq_count = 0;
- test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS);
- GUEST_ASSERT_EQ(vcpu_shared_irq_count, 2);
+ for (i = 0; i < targs.overflow_irqnum; i++)
+ test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS);
+ GUEST_ASSERT_EQ(vcpu_shared_irq_count, targs.overflow_irqnum);
GUEST_DONE();
}
@@ -609,7 +614,11 @@ static void test_vm_events_overflow(void *guest_code)
vcpu_init_vector_tables(vcpu);
/* Initialize guest timer frequency. */
timer_freq = vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency));
+
+ /* Export the shared variables to the guest */
sync_global_to_guest(vm, timer_freq);
+ sync_global_to_guest(vm, vcpu_shared_irq_count);
+ sync_global_to_guest(vm, targs);
run_vcpu(vcpu);
@@ -618,28 +627,38 @@ static void test_vm_events_overflow(void *guest_code)
static void test_print_help(char *name)
{
- pr_info("Usage: %s [-h] [-d <test name>]\n", name);
- pr_info("\t-d: Test to disable. Available tests are 'basic', 'events', 'snapshot', 'overflow'\n");
+ pr_info("Usage: %s [-h] [-t <test name>] [-n <number of LCOFI interrupt for overflow test>]\n",
+ name);
+ pr_info("\t-t: Test to run (default all). Available tests are 'basic', 'events', 'snapshot', 'overflow'\n");
+ pr_info("\t-n: Number of LCOFI interrupt to trigger for each event in overflow test (default: %d)\n",
+ SBI_PMU_OVERFLOW_IRQNUM_DEFAULT);
pr_info("\t-h: print this help screen\n");
}
static bool parse_args(int argc, char *argv[])
{
int opt;
+ int temp_disabled_tests = SBI_PMU_TEST_BASIC | SBI_PMU_TEST_EVENTS | SBI_PMU_TEST_SNAPSHOT |
+ SBI_PMU_TEST_OVERFLOW;
+ int overflow_interrupts = 0;
- while ((opt = getopt(argc, argv, "hd:")) != -1) {
+ while ((opt = getopt(argc, argv, "ht:n:")) != -1) {
switch (opt) {
- case 'd':
+ case 't':
if (!strncmp("basic", optarg, 5))
- disabled_tests |= SBI_PMU_TEST_BASIC;
+ temp_disabled_tests &= ~SBI_PMU_TEST_BASIC;
else if (!strncmp("events", optarg, 6))
- disabled_tests |= SBI_PMU_TEST_EVENTS;
+ temp_disabled_tests &= ~SBI_PMU_TEST_EVENTS;
else if (!strncmp("snapshot", optarg, 8))
- disabled_tests |= SBI_PMU_TEST_SNAPSHOT;
+ temp_disabled_tests &= ~SBI_PMU_TEST_SNAPSHOT;
else if (!strncmp("overflow", optarg, 8))
- disabled_tests |= SBI_PMU_TEST_OVERFLOW;
+ temp_disabled_tests &= ~SBI_PMU_TEST_OVERFLOW;
else
goto done;
+ targs.disabled_tests = temp_disabled_tests;
+ break;
+ case 'n':
+ overflow_interrupts = atoi_positive("Number of LCOFI", optarg);
break;
case 'h':
default:
@@ -647,6 +666,15 @@ static bool parse_args(int argc, char *argv[])
}
}
+ if (overflow_interrupts > 0) {
+ if (targs.disabled_tests & SBI_PMU_TEST_OVERFLOW) {
+ pr_info("-n option is only available for overflow test\n");
+ goto done;
+ } else {
+ targs.overflow_irqnum = overflow_interrupts;
+ }
+ }
+
return true;
done:
test_print_help(argv[0]);
@@ -655,25 +683,28 @@ done:
int main(int argc, char *argv[])
{
+ targs.disabled_tests = 0;
+ targs.overflow_irqnum = SBI_PMU_OVERFLOW_IRQNUM_DEFAULT;
+
if (!parse_args(argc, argv))
exit(KSFT_SKIP);
- if (!(disabled_tests & SBI_PMU_TEST_BASIC)) {
+ if (!(targs.disabled_tests & SBI_PMU_TEST_BASIC)) {
test_vm_basic_test(test_pmu_basic_sanity);
pr_info("SBI PMU basic test : PASS\n");
}
- if (!(disabled_tests & SBI_PMU_TEST_EVENTS)) {
+ if (!(targs.disabled_tests & SBI_PMU_TEST_EVENTS)) {
test_vm_events_test(test_pmu_events);
pr_info("SBI PMU event verification test : PASS\n");
}
- if (!(disabled_tests & SBI_PMU_TEST_SNAPSHOT)) {
+ if (!(targs.disabled_tests & SBI_PMU_TEST_SNAPSHOT)) {
test_vm_events_snapshot_test(test_pmu_events_snaphost);
pr_info("SBI PMU event verification with snapshot test : PASS\n");
}
- if (!(disabled_tests & SBI_PMU_TEST_OVERFLOW)) {
+ if (!(targs.disabled_tests & SBI_PMU_TEST_OVERFLOW)) {
test_vm_events_overflow(test_pmu_events_overflow);
pr_info("SBI PMU event verification with overflow test : PASS\n");
}
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index e5898678bfab..1375fca80bcd 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -196,25 +196,27 @@ static void calc_min_max_cpu(void)
static void help(const char *name)
{
puts("");
- printf("usage: %s [-h] [-u]\n", name);
+ printf("usage: %s [-h] [-u] [-l latency]\n", name);
printf(" -u: Don't sanity check the number of successful KVM_RUNs\n");
+ printf(" -l: Set /dev/cpu_dma_latency to suppress deep sleep states\n");
puts("");
exit(0);
}
int main(int argc, char *argv[])
{
+ int r, i, snapshot, opt, fd = -1, latency = -1;
bool skip_sanity_check = false;
- int r, i, snapshot;
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
u32 cpu, rseq_cpu;
- int opt;
- while ((opt = getopt(argc, argv, "hu")) != -1) {
+ while ((opt = getopt(argc, argv, "hl:u")) != -1) {
switch (opt) {
case 'u':
skip_sanity_check = true;
+ case 'l':
+ latency = atoi_paranoid(optarg);
break;
case 'h':
default:
@@ -243,6 +245,20 @@ int main(int argc, char *argv[])
pthread_create(&migration_thread, NULL, migration_worker,
(void *)(unsigned long)syscall(SYS_gettid));
+ if (latency >= 0) {
+ /*
+ * Writes to cpu_dma_latency persist only while the file is
+ * open, i.e. it allows userspace to provide guaranteed latency
+ * while running a workload. Keep the file open until the test
+ * completes, otherwise writing cpu_dma_latency is meaningless.
+ */
+ fd = open("/dev/cpu_dma_latency", O_RDWR);
+ TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("open() /dev/cpu_dma_latency", fd));
+
+ r = write(fd, &latency, 4);
+ TEST_ASSERT(r >= 1, "Error setting /dev/cpu_dma_latency");
+ }
+
for (i = 0; !done; i++) {
vcpu_run(vcpu);
TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
@@ -278,6 +294,9 @@ int main(int argc, char *argv[])
"rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu);
}
+ if (fd > 0)
+ close(fd);
+
/*
* Sanity check that the test was able to enter the guest a reasonable
* number of times, e.g. didn't get stalled too often/long waiting for
@@ -293,8 +312,8 @@ int main(int argc, char *argv[])
TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2),
"Only performed %d KVM_RUNs, task stalled too much?\n\n"
" Try disabling deep sleep states to reduce CPU wakeup latency,\n"
- " e.g. via cpuidle.off=1 or setting /dev/cpu_dma_latency to '0',\n"
- " or run with -u to disable this sanity check.", i);
+ " e.g. via cpuidle.off=1 or via -l <latency>, or run with -u to\n"
+ " disable this sanity check.", i);
pthread_join(migration_thread, NULL);
diff --git a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
index 2929c067c207..b0d2b04a7ff2 100644
--- a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
+++ b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
@@ -41,9 +41,9 @@ struct kvm_page_stats {
static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage)
{
- stats->pages_4k = vm_get_stat(vm, "pages_4k");
- stats->pages_2m = vm_get_stat(vm, "pages_2m");
- stats->pages_1g = vm_get_stat(vm, "pages_1g");
+ stats->pages_4k = vm_get_stat(vm, pages_4k);
+ stats->pages_2m = vm_get_stat(vm, pages_2m);
+ stats->pages_1g = vm_get_stat(vm, pages_1g);
stats->hugepages = stats->pages_2m + stats->pages_1g;
pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n",
diff --git a/tools/testing/selftests/kvm/x86/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
index 22c0c124582f..2b5b4bc6ef7e 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_ipi.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
@@ -63,8 +63,10 @@ static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa)
/* Signal sender vCPU we're ready */
ipis_rcvd[vcpu_id] = (u64)-1;
- for (;;)
- asm volatile("sti; hlt; cli");
+ for (;;) {
+ safe_halt();
+ cli();
+ }
}
static void guest_ipi_handler(struct ex_regs *regs)
diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
index 2b550eff35f1..390ae2d87493 100644
--- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
+++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
@@ -7,6 +7,7 @@
#include "kvm_util.h"
#include "processor.h"
+#include "kselftest.h"
#define CPUID_MWAIT (1u << 3)
@@ -14,6 +15,8 @@ enum monitor_mwait_testcases {
MWAIT_QUIRK_DISABLED = BIT(0),
MISC_ENABLES_QUIRK_DISABLED = BIT(1),
MWAIT_DISABLED = BIT(2),
+ CPUID_DISABLED = BIT(3),
+ TEST_MAX = CPUID_DISABLED * 2 - 1,
};
/*
@@ -35,11 +38,19 @@ do { \
testcase, vector); \
} while (0)
-static void guest_monitor_wait(int testcase)
+static void guest_monitor_wait(void *arg)
{
+ int testcase = (int) (long) arg;
u8 vector;
- GUEST_SYNC(testcase);
+ u64 val = rdmsr(MSR_IA32_MISC_ENABLE) & ~MSR_IA32_MISC_ENABLE_MWAIT;
+ if (!(testcase & MWAIT_DISABLED))
+ val |= MSR_IA32_MISC_ENABLE_MWAIT;
+ wrmsr(MSR_IA32_MISC_ENABLE, val);
+
+ __GUEST_ASSERT(this_cpu_has(X86_FEATURE_MWAIT) == !(testcase & MWAIT_DISABLED),
+ "Expected CPUID.MWAIT %s\n",
+ (testcase & MWAIT_DISABLED) ? "cleared" : "set");
/*
* Arbitrarily MONITOR this function, SVM performs fault checks before
@@ -50,19 +61,6 @@ static void guest_monitor_wait(int testcase)
vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector);
-}
-
-static void guest_code(void)
-{
- guest_monitor_wait(MWAIT_DISABLED);
-
- guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
-
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED);
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED);
-
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED);
GUEST_DONE();
}
@@ -74,56 +72,64 @@ int main(int argc, char *argv[])
struct kvm_vm *vm;
struct ucall uc;
int testcase;
+ char test[80];
- TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ ksft_print_header();
+ ksft_set_plan(12);
+ for (testcase = 0; testcase <= TEST_MAX; testcase++) {
+ vm = vm_create_with_one_vcpu(&vcpu, guest_monitor_wait);
+ vcpu_args_set(vcpu, 1, (void *)(long)testcase);
+
+ disabled_quirks = 0;
+ if (testcase & MWAIT_QUIRK_DISABLED) {
+ disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
+ strcpy(test, "MWAIT can fault");
+ } else {
+ strcpy(test, "MWAIT never faults");
+ }
+ if (testcase & MISC_ENABLES_QUIRK_DISABLED) {
+ disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
+ strcat(test, ", MISC_ENABLE updates CPUID");
+ } else {
+ strcat(test, ", no CPUID updates");
+ }
+
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
+
+ if (!(testcase & MISC_ENABLES_QUIRK_DISABLED) &&
+ (!!(testcase & CPUID_DISABLED) ^ !!(testcase & MWAIT_DISABLED)))
+ continue;
+
+ if (testcase & CPUID_DISABLED) {
+ strcat(test, ", CPUID clear");
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ } else {
+ strcat(test, ", CPUID set");
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ }
+
+ if (testcase & MWAIT_DISABLED)
+ strcat(test, ", MWAIT disabled");
- while (1) {
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- testcase = uc.args[1];
- break;
case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- goto done;
+ /* Detected in vcpu_run */
+ break;
case UCALL_DONE:
- goto done;
+ ksft_test_result_pass("%s\n", test);
+ break;
default:
TEST_FAIL("Unknown ucall %lu", uc.cmd);
- goto done;
- }
-
- disabled_quirks = 0;
- if (testcase & MWAIT_QUIRK_DISABLED)
- disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
- if (testcase & MISC_ENABLES_QUIRK_DISABLED)
- disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
- vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
-
- /*
- * If the MISC_ENABLES quirk (KVM neglects to update CPUID to
- * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT
- * bit in MISC_ENABLES accordingly. If the quirk is enabled,
- * the only valid configuration is MWAIT disabled, as CPUID
- * can't be manually changed after running the vCPU.
- */
- if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) {
- TEST_ASSERT(testcase & MWAIT_DISABLED,
- "Can't toggle CPUID features after running vCPU");
- continue;
+ break;
}
-
- vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE,
- (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT);
+ kvm_vm_free(vm);
}
+ ksft_finished();
-done:
- kvm_vm_free(vm);
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86/nested_emulation_test.c b/tools/testing/selftests/kvm/x86/nested_emulation_test.c
new file mode 100644
index 000000000000..abc824dba04f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_emulation_test.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+enum {
+ SVM_F,
+ VMX_F,
+ NR_VIRTUALIZATION_FLAVORS,
+};
+
+struct emulated_instruction {
+ const char name[32];
+ uint8_t opcode[15];
+ uint32_t exit_reason[NR_VIRTUALIZATION_FLAVORS];
+};
+
+static struct emulated_instruction instructions[] = {
+ {
+ .name = "pause",
+ .opcode = { 0xf3, 0x90 },
+ .exit_reason = { SVM_EXIT_PAUSE,
+ EXIT_REASON_PAUSE_INSTRUCTION, }
+ },
+ {
+ .name = "hlt",
+ .opcode = { 0xf4 },
+ .exit_reason = { SVM_EXIT_HLT,
+ EXIT_REASON_HLT, }
+ },
+};
+
+static uint8_t kvm_fep[] = { 0x0f, 0x0b, 0x6b, 0x76, 0x6d }; /* ud2 ; .ascii "kvm" */
+static uint8_t l2_guest_code[sizeof(kvm_fep) + 15];
+static uint8_t *l2_instruction = &l2_guest_code[sizeof(kvm_fep)];
+
+static uint32_t get_instruction_length(struct emulated_instruction *insn)
+{
+ uint32_t i;
+
+ for (i = 0; i < ARRAY_SIZE(insn->opcode) && insn->opcode[i]; i++)
+ ;
+
+ return i;
+}
+
+static void guest_code(void *test_data)
+{
+ int f = this_cpu_has(X86_FEATURE_SVM) ? SVM_F : VMX_F;
+ int i;
+
+ memcpy(l2_guest_code, kvm_fep, sizeof(kvm_fep));
+
+ if (f == SVM_F) {
+ struct svm_test_data *svm = test_data;
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, NULL, NULL);
+ vmcb->save.idtr.limit = 0;
+ vmcb->save.rip = (u64)l2_guest_code;
+
+ vmcb->control.intercept |= BIT_ULL(INTERCEPT_SHUTDOWN) |
+ BIT_ULL(INTERCEPT_PAUSE) |
+ BIT_ULL(INTERCEPT_HLT);
+ vmcb->control.intercept_exceptions = 0;
+ } else {
+ GUEST_ASSERT(prepare_for_vmx_operation(test_data));
+ GUEST_ASSERT(load_vmcs(test_data));
+
+ prepare_vmcs(test_data, NULL, NULL);
+ GUEST_ASSERT(!vmwrite(GUEST_IDTR_LIMIT, 0));
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
+ GUEST_ASSERT(!vmwrite(EXCEPTION_BITMAP, 0));
+
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
+ CPU_BASED_PAUSE_EXITING |
+ CPU_BASED_HLT_EXITING);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(instructions); i++) {
+ struct emulated_instruction *insn = &instructions[i];
+ uint32_t insn_len = get_instruction_length(insn);
+ uint32_t exit_insn_len;
+ u32 exit_reason;
+
+ /*
+ * Copy the target instruction to the L2 code stream, and fill
+ * the remaining bytes with INT3s so that a missed intercept
+ * results in a consistent failure mode (SHUTDOWN).
+ */
+ memcpy(l2_instruction, insn->opcode, insn_len);
+ memset(l2_instruction + insn_len, 0xcc, sizeof(insn->opcode) - insn_len);
+
+ if (f == SVM_F) {
+ struct svm_test_data *svm = test_data;
+ struct vmcb *vmcb = svm->vmcb;
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ exit_reason = vmcb->control.exit_code;
+ exit_insn_len = vmcb->control.next_rip - vmcb->save.rip;
+ GUEST_ASSERT_EQ(vmcb->save.rip, (u64)l2_instruction);
+ } else {
+ GUEST_ASSERT_EQ(i ? vmresume() : vmlaunch(), 0);
+ exit_reason = vmreadz(VM_EXIT_REASON);
+ exit_insn_len = vmreadz(VM_EXIT_INSTRUCTION_LEN);
+ GUEST_ASSERT_EQ(vmreadz(GUEST_RIP), (u64)l2_instruction);
+ }
+
+ __GUEST_ASSERT(exit_reason == insn->exit_reason[f],
+ "Wanted exit_reason '0x%x' for '%s', got '0x%x'",
+ insn->exit_reason[f], insn->name, exit_reason);
+
+ __GUEST_ASSERT(exit_insn_len == insn_len,
+ "Wanted insn_len '%u' for '%s', got '%u'",
+ insn_len, insn->name, exit_insn_len);
+ }
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t nested_test_data_gva;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(is_forced_emulation_enabled);
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul);
+
+ if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_test_data_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+ vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
index e7efb2b35f8b..c0d84827f736 100644
--- a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
@@ -73,7 +73,7 @@ static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m)
{
int actual_pages_2m;
- actual_pages_2m = vm_get_stat(vm, "pages_2m");
+ actual_pages_2m = vm_get_stat(vm, pages_2m);
TEST_ASSERT(actual_pages_2m == expected_pages_2m,
"Unexpected 2m page count. Expected %d, got %d",
@@ -84,7 +84,7 @@ static void check_split_count(struct kvm_vm *vm, int expected_splits)
{
int actual_splits;
- actual_splits = vm_get_stat(vm, "nx_lpage_splits");
+ actual_splits = vm_get_stat(vm, nx_lpage_splits);
TEST_ASSERT(actual_splits == expected_splits,
"Unexpected NX huge page split count. Expected %d, got %d",
diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
index 698cb36989db..8aaaf25b6111 100644
--- a/tools/testing/selftests/kvm/x86/pmu_counters_test.c
+++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
@@ -17,7 +17,7 @@
* Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE,
* 1 LOOP.
*/
-#define NUM_INSNS_PER_LOOP 3
+#define NUM_INSNS_PER_LOOP 4
/*
* Number of "extra" instructions that will be counted, i.e. the number of
@@ -29,10 +29,59 @@
/* Total number of instructions retired within the measured section. */
#define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
+/* Track which architectural events are supported by hardware. */
+static uint32_t hardware_pmu_arch_events;
static uint8_t kvm_pmu_version;
static bool kvm_has_perf_caps;
+#define X86_PMU_FEATURE_NULL \
+({ \
+ struct kvm_x86_pmu_feature feature = {}; \
+ \
+ feature; \
+})
+
+static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
+{
+ return !(*(u64 *)&event);
+}
+
+struct kvm_intel_pmu_event {
+ struct kvm_x86_pmu_feature gp_event;
+ struct kvm_x86_pmu_feature fixed_event;
+};
+
+/*
+ * Wrap the array to appease the compiler, as the macros used to construct each
+ * kvm_x86_pmu_feature use syntax that's only valid in function scope, and the
+ * compiler often thinks the feature definitions aren't compile-time constants.
+ */
+static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx)
+{
+ const struct kvm_intel_pmu_event __intel_event_to_feature[] = {
+ [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
+ [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
+ /*
+ * Note, the fixed counter for reference cycles is NOT the same as the
+ * general purpose architectural event. The fixed counter explicitly
+ * counts at the same frequency as the TSC, whereas the GP event counts
+ * at a fixed, but uarch specific, frequency. Bundle them here for
+ * simplicity.
+ */
+ [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
+ [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
+ };
+
+ kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS);
+
+ return __intel_event_to_feature[idx];
+}
+
static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
void *guest_code,
uint8_t pmu_version,
@@ -42,6 +91,7 @@ static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
vm = vm_create_with_one_vcpu(vcpu, guest_code);
sync_global_to_guest(vm, kvm_pmu_version);
+ sync_global_to_guest(vm, hardware_pmu_arch_events);
/*
* Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
@@ -98,14 +148,12 @@ static uint8_t guest_get_pmu_version(void)
* Sanity check that in all cases, the event doesn't count when it's disabled,
* and that KVM correctly emulates the write of an arbitrary value.
*/
-static void guest_assert_event_count(uint8_t idx,
- struct kvm_x86_pmu_feature event,
- uint32_t pmc, uint32_t pmc_msr)
+static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr)
{
uint64_t count;
count = _rdpmc(pmc);
- if (!this_pmu_has(event))
+ if (!(hardware_pmu_arch_events & BIT(idx)))
goto sanity_checks;
switch (idx) {
@@ -126,7 +174,9 @@ static void guest_assert_event_count(uint8_t idx,
GUEST_ASSERT_NE(count, 0);
break;
case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
- GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
+ __GUEST_ASSERT(count >= NUM_INSNS_RETIRED,
+ "Expected top-down slots >= %u, got count = %lu",
+ NUM_INSNS_RETIRED, count);
break;
default:
break;
@@ -162,75 +212,42 @@ do { \
"1:\n\t" \
clflush "\n\t" \
"mfence\n\t" \
+ "mov %[m], %%eax\n\t" \
FEP "loop 1b\n\t" \
FEP "mov %%edi, %%ecx\n\t" \
FEP "xor %%eax, %%eax\n\t" \
FEP "xor %%edx, %%edx\n\t" \
"wrmsr\n\t" \
:: "a"((uint32_t)_value), "d"(_value >> 32), \
- "c"(_msr), "D"(_msr) \
+ "c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version) \
); \
} while (0)
-#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
+#define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
do { \
- wrmsr(pmc_msr, 0); \
+ wrmsr(_pmc_msr, 0); \
\
if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \
- GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP); \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP); \
else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \
- GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP); \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush %[m]", FEP); \
else \
GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \
\
- guest_assert_event_count(_idx, _event, _pmc, _pmc_msr); \
+ guest_assert_event_count(_idx, _pmc, _pmc_msr); \
} while (0)
-static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
- uint32_t pmc, uint32_t pmc_msr,
+static void __guest_test_arch_event(uint8_t idx, uint32_t pmc, uint32_t pmc_msr,
uint32_t ctrl_msr, uint64_t ctrl_msr_value)
{
- GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
+ GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
if (is_forced_emulation_enabled)
- GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
-}
-
-#define X86_PMU_FEATURE_NULL \
-({ \
- struct kvm_x86_pmu_feature feature = {}; \
- \
- feature; \
-})
-
-static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
-{
- return !(*(u64 *)&event);
+ GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
}
static void guest_test_arch_event(uint8_t idx)
{
- const struct {
- struct kvm_x86_pmu_feature gp_event;
- struct kvm_x86_pmu_feature fixed_event;
- } intel_event_to_feature[] = {
- [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
- [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
- /*
- * Note, the fixed counter for reference cycles is NOT the same
- * as the general purpose architectural event. The fixed counter
- * explicitly counts at the same frequency as the TSC, whereas
- * the GP event counts at a fixed, but uarch specific, frequency.
- * Bundle them here for simplicity.
- */
- [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
- [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
- };
-
uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
uint32_t pmu_version = guest_get_pmu_version();
/* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
@@ -248,7 +265,7 @@ static void guest_test_arch_event(uint8_t idx)
else
base_pmc_msr = MSR_IA32_PERFCTR0;
- gp_event = intel_event_to_feature[idx].gp_event;
+ gp_event = intel_event_to_feature(idx).gp_event;
GUEST_ASSERT_EQ(idx, gp_event.f.bit);
GUEST_ASSERT(nr_gp_counters);
@@ -262,14 +279,14 @@ static void guest_test_arch_event(uint8_t idx)
if (guest_has_perf_global_ctrl)
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
- __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
+ __guest_test_arch_event(idx, i, base_pmc_msr + i,
MSR_P6_EVNTSEL0 + i, eventsel);
}
if (!guest_has_perf_global_ctrl)
return;
- fixed_event = intel_event_to_feature[idx].fixed_event;
+ fixed_event = intel_event_to_feature(idx).fixed_event;
if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
return;
@@ -277,7 +294,7 @@ static void guest_test_arch_event(uint8_t idx)
wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
- __guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED,
+ __guest_test_arch_event(idx, i | INTEL_RDPMC_FIXED,
MSR_CORE_PERF_FIXED_CTR0 + i,
MSR_CORE_PERF_GLOBAL_CTRL,
FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
@@ -331,9 +348,9 @@ __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \
expect_gp ? "#GP" : "no fault", msr, vector) \
#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \
- __GUEST_ASSERT(val == expected_val, \
+ __GUEST_ASSERT(val == expected, \
"Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \
- msr, expected_val, val);
+ msr, expected, val);
static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
uint64_t expected_val)
@@ -545,7 +562,6 @@ static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
static void test_intel_counters(void)
{
- uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
@@ -567,18 +583,26 @@ static void test_intel_counters(void)
/*
* Detect the existence of events that aren't supported by selftests.
- * This will (obviously) fail any time the kernel adds support for a
- * new event, but it's worth paying that price to keep the test fresh.
+ * This will (obviously) fail any time hardware adds support for a new
+ * event, but it's worth paying that price to keep the test fresh.
*/
- TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS,
+ TEST_ASSERT(this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) <= NR_INTEL_ARCH_EVENTS,
"New architectural event(s) detected; please update this test (length = %u, mask = %x)",
- nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
+ this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH),
+ this_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
/*
- * Force iterating over known arch events regardless of whether or not
- * KVM/hardware supports a given event.
+ * Iterate over known arch events irrespective of KVM/hardware support
+ * to verify that KVM doesn't reject programming of events just because
+ * the *architectural* encoding is unsupported. Track which events are
+ * supported in hardware; the guest side will validate supported events
+ * count correctly, even if *enumeration* of the event is unsupported
+ * by KVM and/or isn't exposed to the guest.
*/
- nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS);
+ for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) {
+ if (this_pmu_has(intel_event_to_feature(i).gp_event))
+ hardware_pmu_arch_events |= BIT(i);
+ }
for (v = 0; v <= max_pmu_version; v++) {
for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
@@ -594,8 +618,8 @@ static void test_intel_counters(void)
* vector length.
*/
if (v == pmu_version) {
- for (k = 1; k < (BIT(nr_arch_events) - 1); k++)
- test_arch_events(v, perf_caps[i], nr_arch_events, k);
+ for (k = 1; k < (BIT(NR_INTEL_ARCH_EVENTS) - 1); k++)
+ test_arch_events(v, perf_caps[i], NR_INTEL_ARCH_EVENTS, k);
}
/*
* Test single bits for all PMU version and lengths up
@@ -604,11 +628,11 @@ static void test_intel_counters(void)
* host length). Explicitly test a mask of '0' and all
* ones i.e. all events being available and unavailable.
*/
- for (j = 0; j <= nr_arch_events + 1; j++) {
+ for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) {
test_arch_events(v, perf_caps[i], j, 0);
test_arch_events(v, perf_caps[i], j, 0xff);
- for (k = 0; k < nr_arch_events; k++)
+ for (k = 0; k < NR_INTEL_ARCH_EVENTS; k++)
test_arch_events(v, perf_caps[i], j, BIT(k));
}
diff --git a/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
index 916e04248fbb..917b6066cfc1 100644
--- a/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
@@ -42,10 +42,7 @@ static void l2_guest_code(struct svm_test_data *svm)
x2apic_write_reg(APIC_ICR,
APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
- __asm__ __volatile__(
- "sti\n"
- "nop\n"
- );
+ sti_nop();
GUEST_ASSERT(vintr_irq_called);
GUEST_ASSERT(intr_irq_called);
diff --git a/tools/testing/selftests/kvm/x86/ucna_injection_test.c b/tools/testing/selftests/kvm/x86/ucna_injection_test.c
index 57f157c06b39..1e5e564523b3 100644
--- a/tools/testing/selftests/kvm/x86/ucna_injection_test.c
+++ b/tools/testing/selftests/kvm/x86/ucna_injection_test.c
@@ -86,7 +86,7 @@ static void ucna_injection_guest_code(void)
wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
/* Enables interrupt in guest. */
- asm volatile("sti");
+ sti();
/* Let user space inject the first UCNA */
GUEST_SYNC(SYNC_FIRST_UCNA);
diff --git a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
index a76078a08ff8..35cb9de54a82 100644
--- a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
@@ -106,7 +106,8 @@ static void halter_guest_code(struct test_data_page *data)
data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
data->hlt_count++;
- asm volatile("sti; hlt; cli");
+ safe_halt();
+ cli();
data->wake_count++;
}
}
@@ -465,6 +466,19 @@ int main(int argc, char *argv[])
cancel_join_vcpu_thread(threads[0], params[0].vcpu);
cancel_join_vcpu_thread(threads[1], params[1].vcpu);
+ /*
+ * If the host support Idle HLT, i.e. KVM *might* be using Idle HLT,
+ * then the number of HLT exits may be less than the number of HLTs
+ * that were executed, as Idle HLT elides the exit if the vCPU has an
+ * unmasked, pending IRQ (or NMI).
+ */
+ if (this_cpu_has(X86_FEATURE_IDLE_HLT))
+ TEST_ASSERT(data->hlt_count >= vcpu_get_stat(params[0].vcpu, halt_exits),
+ "HLT insns = %lu, HLT exits = %lu",
+ data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits));
+ else
+ TEST_ASSERT_EQ(data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits));
+
fprintf(stderr,
"Test successful after running for %d seconds.\n"
"Sending vCPU sent %lu IPIs to halting vCPU\n"
diff --git a/tools/testing/selftests/kvm/x86/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c
index 88bcca188799..fdebff1165c7 100644
--- a/tools/testing/selftests/kvm/x86/xapic_state_test.c
+++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c
@@ -18,7 +18,7 @@ struct xapic_vcpu {
static void xapic_guest_code(void)
{
- asm volatile("cli");
+ cli();
xapic_enable();
@@ -38,7 +38,7 @@ static void xapic_guest_code(void)
static void x2apic_guest_code(void)
{
- asm volatile("cli");
+ cli();
x2apic_enable();
diff --git a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
index a59b3c799bb2..287829f850f7 100644
--- a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
@@ -191,10 +191,7 @@ static void guest_code(void)
struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
int i;
- __asm__ __volatile__(
- "sti\n"
- "nop\n"
- );
+ sti_nop();
/* Trigger an interrupt injection */
GUEST_SYNC(TEST_INJECT_VECTOR);
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 73ee88d6b043..5916f3b81c39 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -31,6 +31,7 @@ TEST_PROGS += veth.sh
TEST_PROGS += ioam6.sh
TEST_PROGS += gro.sh
TEST_PROGS += gre_gso.sh
+TEST_PROGS += gre_ipv6_lladdr.sh
TEST_PROGS += cmsg_so_mark.sh
TEST_PROGS += cmsg_so_priority.sh
TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh
diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh
new file mode 100755
index 000000000000..5b34f6e1f831
--- /dev/null
+++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh
@@ -0,0 +1,177 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./lib.sh
+
+PAUSE_ON_FAIL="no"
+
+# The trap function handler
+#
+exit_cleanup_all()
+{
+ cleanup_all_ns
+
+ exit "${EXIT_STATUS}"
+}
+
+# Add fake IPv4 and IPv6 networks on the loopback device, to be used as
+# underlay by future GRE devices.
+#
+setup_basenet()
+{
+ ip -netns "${NS0}" link set dev lo up
+ ip -netns "${NS0}" address add dev lo 192.0.2.10/24
+ ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad
+}
+
+# Check if network device has an IPv6 link-local address assigned.
+#
+# Parameters:
+#
+# * $1: The network device to test
+# * $2: An extra regular expression that should be matched (to verify the
+# presence of extra attributes)
+# * $3: The expected return code from grep (to allow checking the absence of
+# a link-local address)
+# * $4: The user visible name for the scenario being tested
+#
+check_ipv6_ll_addr()
+{
+ local DEV="$1"
+ local EXTRA_MATCH="$2"
+ local XRET="$3"
+ local MSG="$4"
+
+ RET=0
+ set +e
+ ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}"
+ check_err_fail "${XRET}" $? ""
+ log_test "${MSG}"
+ set -e
+}
+
+# Create a GRE device and verify that it gets an IPv6 link-local address as
+# expected.
+#
+# Parameters:
+#
+# * $1: The device type (gre, ip6gre, gretap or ip6gretap)
+# * $2: The local underlay IP address (can be an IPv4, an IPv6 or "any")
+# * $3: The remote underlay IP address (can be an IPv4, an IPv6 or "any")
+# * $4: The IPv6 interface identifier generation mode to use for the GRE
+# device (eui64, none, stable-privacy or random).
+#
+test_gre_device()
+{
+ local GRE_TYPE="$1"
+ local LOCAL_IP="$2"
+ local REMOTE_IP="$3"
+ local MODE="$4"
+ local ADDR_GEN_MODE
+ local MATCH_REGEXP
+ local MSG
+
+ ip link add netns "${NS0}" name gretest type "${GRE_TYPE}" local "${LOCAL_IP}" remote "${REMOTE_IP}"
+
+ case "${MODE}" in
+ "eui64")
+ ADDR_GEN_MODE=0
+ MATCH_REGEXP=""
+ MSG="${GRE_TYPE}, mode: 0 (EUI64), ${LOCAL_IP} -> ${REMOTE_IP}"
+ XRET=0
+ ;;
+ "none")
+ ADDR_GEN_MODE=1
+ MATCH_REGEXP=""
+ MSG="${GRE_TYPE}, mode: 1 (none), ${LOCAL_IP} -> ${REMOTE_IP}"
+ XRET=1 # No link-local address should be generated
+ ;;
+ "stable-privacy")
+ ADDR_GEN_MODE=2
+ MATCH_REGEXP="stable-privacy"
+ MSG="${GRE_TYPE}, mode: 2 (stable privacy), ${LOCAL_IP} -> ${REMOTE_IP}"
+ XRET=0
+ # Initialise stable_secret (required for stable-privacy mode)
+ ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.stable_secret="2001:db8::abcd"
+ ;;
+ "random")
+ ADDR_GEN_MODE=3
+ MATCH_REGEXP="stable-privacy"
+ MSG="${GRE_TYPE}, mode: 3 (random), ${LOCAL_IP} -> ${REMOTE_IP}"
+ XRET=0
+ ;;
+ esac
+
+ # Check that IPv6 link-local address is generated when device goes up
+ ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}"
+ ip -netns "${NS0}" link set dev gretest up
+ check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}"
+
+ # Now disable link-local address generation
+ ip -netns "${NS0}" link set dev gretest down
+ ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1
+ ip -netns "${NS0}" link set dev gretest up
+
+ # Check that link-local address generation works when re-enabled while
+ # the device is already up
+ ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}"
+ check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}"
+
+ ip -netns "${NS0}" link del dev gretest
+}
+
+test_gre4()
+{
+ local GRE_TYPE
+ local MODE
+
+ for GRE_TYPE in "gre" "gretap"; do
+ printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n"
+
+ for MODE in "eui64" "none" "stable-privacy" "random"; do
+ test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}"
+ test_gre_device "${GRE_TYPE}" any 192.0.2.11 "${MODE}"
+ test_gre_device "${GRE_TYPE}" 192.0.2.10 any "${MODE}"
+ done
+ done
+}
+
+test_gre6()
+{
+ local GRE_TYPE
+ local MODE
+
+ for GRE_TYPE in "ip6gre" "ip6gretap"; do
+ printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n"
+
+ for MODE in "eui64" "none" "stable-privacy" "random"; do
+ test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}"
+ test_gre_device "${GRE_TYPE}" any 2001:db8::11 "${MODE}"
+ test_gre_device "${GRE_TYPE}" 2001:db8::10 any "${MODE}"
+ done
+ done
+}
+
+usage()
+{
+ echo "Usage: $0 [-p]"
+ exit 1
+}
+
+while getopts :p o
+do
+ case $o in
+ p) PAUSE_ON_FAIL="yes";;
+ *) usage;;
+ esac
+done
+
+setup_ns NS0
+
+set -e
+trap exit_cleanup_all EXIT
+
+setup_basenet
+
+test_gre4
+test_gre6
diff --git a/tools/testing/selftests/net/lib/xdp_dummy.bpf.c b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c
index d988b2e0cee8..e73fab3edd9f 100644
--- a/tools/testing/selftests/net/lib/xdp_dummy.bpf.c
+++ b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c
@@ -10,4 +10,10 @@ int xdp_dummy_prog(struct xdp_md *ctx)
return XDP_PASS;
}
+SEC("xdp.frags")
+int xdp_dummy_prog_frags(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh
index c28379a965d8..1559ba275105 100755
--- a/tools/testing/selftests/net/netfilter/br_netfilter.sh
+++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh
@@ -13,6 +13,12 @@ source lib.sh
checktool "nft --version" "run test without nft tool"
+read t < /proc/sys/kernel/tainted
+if [ "$t" -ne 0 ];then
+ echo SKIP: kernel is tainted
+ exit $ksft_skip
+fi
+
cleanup() {
cleanup_all_ns
}
@@ -165,6 +171,7 @@ if [ "$t" -eq 0 ];then
echo PASS: kernel not tainted
else
echo ERROR: kernel is tainted
+ dmesg
ret=1
fi
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
index 6a764d70ab06..4788641717d9 100755
--- a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
+++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
@@ -4,6 +4,12 @@ source lib.sh
checktool "nft --version" "run test without nft tool"
+read t < /proc/sys/kernel/tainted
+if [ "$t" -ne 0 ];then
+ echo SKIP: kernel is tainted
+ exit $ksft_skip
+fi
+
cleanup() {
cleanup_all_ns
}
@@ -72,6 +78,7 @@ if [ "$t" -eq 0 ];then
echo PASS: kernel not tainted
else
echo ERROR: kernel is tainted
+ dmesg
exit 1
fi
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index 785e3875a6da..784d1b46912b 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -593,6 +593,7 @@ EOF
echo "PASS: queue program exiting while packets queued"
else
echo "TAINT: queue program exiting while packets queued"
+ dmesg
ret=1
fi
}
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json
index 7126ec3485cb..2b61d8d79bde 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json
@@ -61,5 +61,30 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "4009",
+ "name": "Reject creation of DRR class with classid TC_H_ROOT",
+ "category": [
+ "qdisc",
+ "drr"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY root handle ffff: drr",
+ "$TC filter add dev $DUMMY parent ffff: basic classid ffff:1",
+ "$TC class add dev $DUMMY parent ffff: classid ffff:1 drr",
+ "$TC filter add dev $DUMMY parent ffff: prio 1 u32 match u16 0x0000 0xfe00 at 2 flowid ffff:ffff"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent ffff: classid ffff:ffff drr",
+ "expExitCode": "2",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class drr ffff:ffff",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root"
+ ]
}
]
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 54e959e7d68f..727b542074e7 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -75,7 +75,7 @@ config KVM_COMPAT
depends on KVM && COMPAT && !(S390 || ARM64 || RISCV)
config HAVE_KVM_IRQ_BYPASS
- bool
+ tristate
select IRQ_BYPASS_MANAGER
config HAVE_KVM_VCPU_ASYNC_IOCTL
@@ -104,6 +104,10 @@ config KVM_ELIDE_TLB_FLUSH_IF_YOUNG
depends on KVM_GENERIC_MMU_NOTIFIER
bool
+config KVM_MMU_LOCKLESS_AGING
+ depends on KVM_GENERIC_MMU_NOTIFIER
+ bool
+
config KVM_GENERIC_MEMORY_ATTRIBUTES
depends on KVM_GENERIC_MMU_NOTIFIER
bool
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 249ba5b72e9b..11e5d1e3f12e 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -149,7 +149,7 @@ irqfd_shutdown(struct work_struct *work)
/*
* It is now safe to release the object's resources
*/
-#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
irq_bypass_unregister_consumer(&irqfd->consumer);
#endif
eventfd_ctx_put(irqfd->eventfd);
@@ -274,7 +274,7 @@ static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
write_seqcount_end(&irqfd->irq_entry_sc);
}
-#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
void __attribute__((weak)) kvm_arch_irq_bypass_stop(
struct irq_bypass_consumer *cons)
{
@@ -424,7 +424,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
if (events & EPOLLIN)
schedule_work(&irqfd->inject);
-#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
if (kvm_arch_has_irq_bypass()) {
irqfd->consumer.token = (void *)irqfd->eventfd;
irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
@@ -609,14 +609,14 @@ void kvm_irq_routing_update(struct kvm *kvm)
spin_lock_irq(&kvm->irqfds.lock);
list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
-#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
/* Under irqfds.lock, so can read irq_entry safely */
struct kvm_kernel_irq_routing_entry old = irqfd->irq_entry;
#endif
irqfd_update(kvm, irqfd);
-#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
if (irqfd->producer &&
kvm_arch_irqfd_route_changed(&old, &irqfd->irq_entry)) {
int ret = kvm_arch_update_irqfd_routing(
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ba0327e2d0d3..55153494ac70 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -489,6 +489,14 @@ void kvm_destroy_vcpus(struct kvm *kvm)
kvm_for_each_vcpu(i, vcpu, kvm) {
kvm_vcpu_destroy(vcpu);
xa_erase(&kvm->vcpu_array, i);
+
+ /*
+ * Assert that the vCPU isn't visible in any way, to ensure KVM
+ * doesn't trigger a use-after-free if destroying vCPUs results
+ * in VM-wide request, e.g. to flush remote TLBs when tearing
+ * down MMUs, or to mark the VM dead if a KVM_BUG_ON() fires.
+ */
+ WARN_ON_ONCE(xa_load(&kvm->vcpu_array, i) || kvm_get_vcpu(kvm, i));
}
atomic_set(&kvm->online_vcpus, 0);
@@ -517,6 +525,7 @@ struct kvm_mmu_notifier_range {
on_lock_fn_t on_lock;
bool flush_on_ret;
bool may_block;
+ bool lockless;
};
/*
@@ -551,8 +560,8 @@ static void kvm_null_fn(void)
node; \
node = interval_tree_iter_next(node, start, last)) \
-static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm,
- const struct kvm_mmu_notifier_range *range)
+static __always_inline kvm_mn_ret_t kvm_handle_hva_range(struct kvm *kvm,
+ const struct kvm_mmu_notifier_range *range)
{
struct kvm_mmu_notifier_return r = {
.ret = false,
@@ -571,6 +580,10 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm,
IS_KVM_NULL_FN(range->handler)))
return r;
+ /* on_lock will never be called for lockless walks */
+ if (WARN_ON_ONCE(range->lockless && !IS_KVM_NULL_FN(range->on_lock)))
+ return r;
+
idx = srcu_read_lock(&kvm->srcu);
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
@@ -607,15 +620,18 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm,
gfn_range.start = hva_to_gfn_memslot(hva_start, slot);
gfn_range.end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, slot);
gfn_range.slot = slot;
+ gfn_range.lockless = range->lockless;
if (!r.found_memslot) {
r.found_memslot = true;
- KVM_MMU_LOCK(kvm);
- if (!IS_KVM_NULL_FN(range->on_lock))
- range->on_lock(kvm);
-
- if (IS_KVM_NULL_FN(range->handler))
- goto mmu_unlock;
+ if (!range->lockless) {
+ KVM_MMU_LOCK(kvm);
+ if (!IS_KVM_NULL_FN(range->on_lock))
+ range->on_lock(kvm);
+
+ if (IS_KVM_NULL_FN(range->handler))
+ goto mmu_unlock;
+ }
}
r.ret |= range->handler(kvm, &gfn_range);
}
@@ -625,7 +641,7 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm,
kvm_flush_remote_tlbs(kvm);
mmu_unlock:
- if (r.found_memslot)
+ if (r.found_memslot && !range->lockless)
KVM_MMU_UNLOCK(kvm);
srcu_read_unlock(&kvm->srcu, idx);
@@ -633,7 +649,7 @@ mmu_unlock:
return r;
}
-static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn,
+static __always_inline int kvm_age_hva_range(struct mmu_notifier *mn,
unsigned long start,
unsigned long end,
gfn_handler_t handler,
@@ -647,17 +663,18 @@ static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn,
.on_lock = (void *)kvm_null_fn,
.flush_on_ret = flush_on_ret,
.may_block = false,
+ .lockless = IS_ENABLED(CONFIG_KVM_MMU_LOCKLESS_AGING),
};
- return __kvm_handle_hva_range(kvm, &range).ret;
+ return kvm_handle_hva_range(kvm, &range).ret;
}
-static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn,
- unsigned long start,
- unsigned long end,
- gfn_handler_t handler)
+static __always_inline int kvm_age_hva_range_no_flush(struct mmu_notifier *mn,
+ unsigned long start,
+ unsigned long end,
+ gfn_handler_t handler)
{
- return kvm_handle_hva_range(mn, start, end, handler, false);
+ return kvm_age_hva_range(mn, start, end, handler, false);
}
void kvm_mmu_invalidate_begin(struct kvm *kvm)
@@ -752,7 +769,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
* that guest memory has been reclaimed. This needs to be done *after*
* dropping mmu_lock, as x86's reclaim path is slooooow.
*/
- if (__kvm_handle_hva_range(kvm, &hva_range).found_memslot)
+ if (kvm_handle_hva_range(kvm, &hva_range).found_memslot)
kvm_arch_guest_memory_reclaimed(kvm);
return 0;
@@ -798,7 +815,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
};
bool wake;
- __kvm_handle_hva_range(kvm, &hva_range);
+ kvm_handle_hva_range(kvm, &hva_range);
/* Pairs with the increment in range_start(). */
spin_lock(&kvm->mn_invalidate_lock);
@@ -822,8 +839,8 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
{
trace_kvm_age_hva(start, end);
- return kvm_handle_hva_range(mn, start, end, kvm_age_gfn,
- !IS_ENABLED(CONFIG_KVM_ELIDE_TLB_FLUSH_IF_YOUNG));
+ return kvm_age_hva_range(mn, start, end, kvm_age_gfn,
+ !IS_ENABLED(CONFIG_KVM_ELIDE_TLB_FLUSH_IF_YOUNG));
}
static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
@@ -846,7 +863,7 @@ static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
* cadence. If we find this inaccurate, we might come up with a
* more sophisticated heuristic later.
*/
- return kvm_handle_hva_range_no_flush(mn, start, end, kvm_age_gfn);
+ return kvm_age_hva_range_no_flush(mn, start, end, kvm_age_gfn);
}
static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
@@ -855,8 +872,8 @@ static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
{
trace_kvm_test_age_hva(address);
- return kvm_handle_hva_range_no_flush(mn, address, address + 1,
- kvm_test_age_gfn);
+ return kvm_age_hva_range_no_flush(mn, address, address + 1,
+ kvm_test_age_gfn);
}
static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
@@ -1254,7 +1271,6 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_destroy_pm_notifier(kvm);
kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm);
kvm_destroy_vm_debugfs(kvm);
- kvm_arch_sync_events(kvm);
mutex_lock(&kvm_lock);
list_del(&kvm->vm_list);
mutex_unlock(&kvm_lock);